o
    hʊ                     @   s  d dl Z d dlZd dlZd dlmZmZmZmZmZm	Z	m
Z
mZmZmZmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z, d d	l-m.Z.m/Z/ d d
l0m1Z1m2Z2 d dl3m4Z4 d dl5m6Z6 d dl7m8Z8m9Z9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@mAZAmBZB eCeDZEG dd de*ZFG dd deFZGede
ee8ZHG dd deFeeH ZIG dd deIe8 ZJG dd deIe8 ZKG dd deIe8 ZLG dd deIe8 ZMdS )    N)BinaryIODictGenericListOptionalSequenceTextIOTupleTypeVarUnioncast)utils)ImageWriter)LAParamsLTAnnoLTCharLTComponentLTContainerLTCurveLTFigureLTImageLTItemLTLayoutContainerLTLineLTPageLTRectLTText	LTTextBoxLTTextBoxVerticalLTTextGroup
LTTextLineTextGroupElement)PDFColorSpace)PDFTextDevice)PDFValueError)PDFFontPDFUnicodeNotDefined)PDFGraphicStatePDFResourceManager)PDFPage)	PDFStream)AnyIOMatrixPathSegmentPointRectapply_matrix_ptapply_matrix_rectbbox2strencmake_compat_strmult_matrixc                   @   s(  e Zd ZU eed< eed< 		d0dededee	 ddfd	d
Z
dededdfddZdeddfddZdedededdfddZdeddfddZdededdfddZdededededee ddfd d!Zded"ed#ed$ed%ed&ed'ed(edefd)d*Zd"ed&edefd+d,Zd-eddfd.d/ZdS )1PDFLayoutAnalyzercur_itemctm   Nrsrcmgrpagenolaparamsreturnc                 C   s"   t | | || _|| _g | _d S N)r#   __init__r;   r<   _stackselfr:   r;   r<    rC   K/var/www/html/govbot/env/lib/python3.10/site-packages/pdfminer/converter.pyr?   E   s   
zPDFLayoutAnalyzer.__init__pagec                 C   sB   t ||j\}}}}ddt|| t|| f}t| j|| _d S )Nr   )r1   mediaboxabsr   r;   r7   )rB   rE   r8   x0y0x1y1rF   rC   rC   rD   
begin_pageP   s   zPDFLayoutAnalyzer.begin_pagec                 C   sl   | j rJ tt| j t| jtsJ tt| j| jd ur'| j| j |  j	d7  _	| 
| j d S )Nr9   )r@   strlen
isinstancer7   r   typer<   analyzer;   receive_layout)rB   rE   rC   rC   rD   end_pageU   s   
zPDFLayoutAnalyzer.end_pagenamebboxmatrixc                 C   s(   | j | j t||t|| j| _d S r>   )r@   appendr7   r   r5   r8   )rB   rT   rU   rV   rC   rC   rD   begin_figure]   s   zPDFLayoutAnalyzer.begin_figure_c                 C   s@   | j }t| j tsJ tt| j | j | _ | j | d S r>   )r7   rO   r   rM   rP   r@   popadd)rB   rY   figrC   rC   rD   
end_figurea   s   zPDFLayoutAnalyzer.end_figurestreamc                 C   sR   t | jtsJ tt| jt||| jj| jj| jj| jj	f}| j
| d S r>   )rO   r7   r   rM   rP   r   rH   rI   rJ   rK   r[   )rB   rT   r^   itemrC   rC   rD   render_imageg   s   zPDFLayoutAnalyzer.render_imagegstatestrokefillevenoddpathc                    s|  d dd  D }|dd dkrdS |ddkr;td|D ]} |d|d }||||| q!dS  fd	d
 D }	fdd
|	D }
dd
  D }fdd
 D }dd
 t||D }t|dkr|dd dkr|
d |
d kr|dd d }|
	  |dv rt
|j|
d |
d ||||j|j||jd
}j| dS |dv r&|
\\}}\}}\}}\}}}|
d |
d k}||ko||ko||ko||kp||ko||ko||ko||k}|r|rt|jg |
d |
d R ||||j|j||j	}j| dS t|j|
||||j|j||j	}j| dS t|j|
||||j|j||j	}j| dS )z@Paint paths described in section 4.4 of the PDF reference manual c                 s   s    | ]}|d  V  qdS )r   NrC   ).0xrC   rC   rD   	<genexpr>y   s    z/PDFLayoutAnalyzer.paint_path.<locals>.<genexpr>Nr9   mzm[^m]+r   c                    s:   g | ]}t t|d  dkr|dd n d  dd qS )r   hN)r   r.   )rg   p)re   rC   rD   
<listcomp>   s    .z0PDFLayoutAnalyzer.paint_path.<locals>.<listcomp>c                    s   g | ]}t  j|qS rC   )r0   r8   )rg   ptrB   rC   rD   rn          c                 S   s   g | ]}t |d  qS )r   )rM   rg   	operationrC   rC   rD   rn      rq   c              	      s8   g | ]} fd dt |ddd |ddd D qS )c                    s(   g | ]\}}t  jt|t|fqS rC   )r0   r8   float)rg   operand1operand2rp   rC   rD   rn      s    z;PDFLayoutAnalyzer.paint_path.<locals>.<listcomp>.<listcomp>r9   N   )ziprr   rp   rC   rD   rn      s    
c                 S   s"   g | ]\}}t t|g|R qS rC   )r   r-   )rg   orm   rC   rC   rD   rn      s       rl   lhrk   >   mlmlh)original_pathdashing_style>   mlllhmllll   rw   )joincountrefinditerstartend
paint_pathrx   rN   rZ   r   	linewidthscolorncolordashr7   r[   r   r   )rB   ra   rb   rc   rd   re   shaperj   subpathraw_ptspts	operatorstransformed_pointstransformed_pathlinerH   rI   rJ   rK   x2y2x3y3rY   is_closed_loophas_square_coordinatesrectcurverC   )re   rB   rD   r   p   s   	

,
zPDFLayoutAnalyzer.paint_pathfontfontsizescalingrisecidncsgraphicstatec	                 C   s   z| |}	t|	tsJ tt|	W n ty#   | ||}	Y nw ||}
||}t||||||	|
|||
}| j	
| |jS r>   )	to_unichrrO   rM   rP   r&   handle_undefined_char
char_width	char_dispr   r7   r[   adv)rB   rV   r   r   r   r   r   r   r   text	textwidthtextdispr_   rC   rC   rD   render_char   s,   


zPDFLayoutAnalyzer.render_charc                 C   s   t d|| d| S )Nzundefined: %r, %rz(cid:%d))logdebug)rB   r   r   rC   rC   rD   r     s   z'PDFLayoutAnalyzer.handle_undefined_charltpagec                 C      d S r>   rC   rB   r   rC   rC   rD   rR     s   z PDFLayoutAnalyzer.receive_layoutr9   N) __name__
__module____qualname__r   __annotations__r,   r(   intr   r   r?   r)   rL   rS   rM   r/   rX   r]   r*   r`   r'   boolr   r-   r   r%   rt   r"   r   r   r   rR   rC   rC   rC   rD   r6   A   sj   
 
	
{	

!r6   c                	   @   sP   e Zd Z		ddededee ddfddZd	eddfd
dZ	defddZ
dS )PDFPageAggregatorr9   Nr:   r;   r<   r=   c                 C   s   t j| |||d d | _d S N)r;   r<   )r6   r?   resultrA   rC   rC   rD   r?     s   
zPDFPageAggregator.__init__r   c                 C   s
   || _ d S r>   r   r   rC   rC   rD   rR     s   
z PDFPageAggregator.receive_layoutc                 C   s   | j d usJ | j S r>   r   rp   rC   rC   rD   
get_result!  s   zPDFPageAggregator.get_resultr   )r   r   r   r(   r   r   r   r?   r   rR   r   rC   rC   rC   rD   r     s    
	r   IOTypec                   @   sP   e Zd Z			ddededededee d	dfd
dZ	e
ded	efddZdS )PDFConverterutf-8r9   Nr:   outfpcodecr;   r<   r=   c                 C   s0   t j| |||d || _|| _| | j| _d S r   )r6   r?   r   r   _is_binary_streamoutfp_binary)rB   r:   r   r   r;   r<   rC   rC   rD   r?   +  s   zPDFConverter.__init__c                 C   sR   dt | ddv r
dS t| drdS t| tjrdS t| tjs%t| tjr'dS dS )z"Test if an stream is binary or notbmoderf   TF)getattrhasattrrO   ioBytesIOStringIO
TextIOBase)r   rC   rC   rD   r   8  s   
zPDFConverter._is_binary_stream)r   r9   N)r   r   r   r(   r   rM   r   r   r   r?   staticmethodr+   r   r   rC   rC   rC   rD   r   *  s&    
r   c                       s   e Zd Z					d dedededed	ee d
e	dee
 ddf fddZdeddfddZdeddfddZdededdfddZdede	de	de	dee ddfddZ  ZS )!TextConverterr   r9   NFr:   r   r   r;   r<   
showpagenoimagewriterr=   c                    s&   t  j|||||d || _|| _d S )Nr   r;   r<   )superr?   r   r   )rB   r:   r   r   r;   r<   r   r   	__class__rC   rD   r?   I  s   

zTextConverter.__init__r   c                 C   sF   t || jd}| jrtt| j|  d S tt	| j| d S )Nignore)
r   compatible_encode_methodr   r   r   r   r   writeencoder   rB   r   rC   rC   rD   
write_textW  s   zTextConverter.write_textr   c                    sD   dt dd f fdd jrd|j   | d d S )Nr_   r=   c                    sz   t | tr| D ]} | qnt | tr|   t | tr'd d S t | tr9jd ur;j|  d S d S d S )N
)	rO   r   r   r   get_textr   r   r   export_imager_   childrenderrB   rC   rD   r   _  s   





z,TextConverter.receive_layout.<locals>.renderzPage %s
)r   r   r   pageidr   rC   r   rD   rR   ^  s
   zTextConverter.receive_layoutrT   r^   c                 C   s    | j d urt| || d S d S r>   )r   r   r`   )rB   rT   r^   rC   rC   rD   r`   s  s   
zTextConverter.render_imagera   rb   rc   rd   re   c                 C   r   r>   rC   )rB   ra   rb   rc   rd   re   rC   rC   rD   r   w  s   zTextConverter.paint_path)r   r9   NFN)r   r   r   r(   r+   rM   r   r   r   r   r   r?   r   r   rR   r*   r`   r'   r   r-   r   __classcell__rC   rC   r   rD   r   H  sN    	r   c                    @   s  e Zd ZdddddddZddd	Z	
											dMdedededede	e
 dededededede	e dede	eeef  de	eeef  d dfd!d"Zd#ed dfd$d%ZdNd&d'ZdNd(d)Zd#ed dfd*d+Zd,ed-ed.ed/ed0ed1ed dfd2d3Zd,ed-ed4ed dfd5d6Zd4ed-ed.ed/ed0ed1ed dfd7d8Zd,ed#ed.ed/ed9ed dfd:d;Z	<dOd,ed-ed.ed/ed0ed1ed=ed dfd>d?Zd,ed dfd@dAZd#edBedCed dfdDdEZdNdFdGZdHed dfdIdJZdNdKdLZ dS )PHTMLConverteryellowmagentacyanredblackgray)figuretextlinetextbox	textgroupr   rE   blue)r   charr   r9   N      ?normalT2   r   r:   r   r   r;   r<   scale	fontscale
layoutmoder   
pagemarginr   r   rect_colorstext_colorsr=   c                 C   s   t j| |||||d | jr| jstd| js| jrtd|d u r'ddi}|d u r0ddd}|| _|| _|| _|	| _|
| _	|| _
|| _|| _|rX| j| j | j| j | j	| _d | _g | _|   d S )Nr   )Codec is required for a binary I/O outputz1Codec must not be specified for a text I/O outputr   r   r   )r   rE   )r   r?   r   r   r$   r   r   r   r   r   r   r   r   updateRECT_COLORSTEXT_COLORS_yoffset_font
_fontstackwrite_header)rB   r:   r   r   r;   r<   r   r   r   r   r   r   r   r   r   rC   rC   rD   r?     s>   

zHTMLConverter.__init__r   c                 C   :   | j rtt| j|| j  d S tt| j| d S r>   r   r   r   r   r   r   r   r   rC   rC   rD   r        zHTMLConverter.writec                 C   s8   |  d | jrd| j }nd}|  | |  d d S )Nz<html><head>
zA<meta http-equiv="Content-Type" content="text/html; charset=%s">
z5<meta http-equiv="Content-Type" content="text/html">
z</head><body>
)r   r   )rB   srC   rC   rD   r    s   

zHTMLConverter.write_headerc                 C   s<   dd t d| jD }dd| }| | | d d S )Nc                 S   s   g | ]}d | d| dqS )z
<a href="#z">z</a>rC   )rg   irC   rC   rD   rn     s    z.HTMLConverter.write_footer.<locals>.<listcomp>r9   z8<div style="position:absolute; top:0px;">Page: %s</div>
z, z</body></html>
)ranger;   r   r   )rB   
page_linksr  rC   rC   rD   write_footer  s   
zHTMLConverter.write_footerc                 C   s   |  t| d S r>   )r   r3   r   rC   rC   rD   r     s   zHTMLConverter.write_textcolorborderwidthrh   ywrk   c           	      C   sX   | j |}|d ur*d|||| j | j| | j || j || j f }| | d S d S )Nzn<span style="position:absolute; border: %s %dpx solid; left:%dpx; top:%dpx; width:%dpx; height:%dpx;"></span>
)r   getr   r   r   )	rB   r  r  rh   r  r  rk   color2r  rC   rC   rD   
place_rect  s   	zHTMLConverter.place_rectr_   c                 C   s    |  |||j|j|j|j d S r>   )r  rH   rK   widthheight)rB   r  r  r_   rC   rC   rD   place_border  s    zHTMLConverter.place_borderc           	      C   s^   | j d ur-| j |}dt|||| j | j| | j || j || j f }| | d S d S )Nzd<img src="%s" border="%d" style="position:absolute; left:%dpx; top:%dpx;" width="%d" height="%d" />
)r   r   r3   r   r   r   )	rB   r_   r  rh   r  r  rk   rT   r  rC   rC   rD   place_image  s   
	zHTMLConverter.place_imagesizec                 C   sh   | j |}|d ur2d||| j | j| | j || j | j f }| | | | | d d S d S )NzP<span style="position:absolute; color:%s; left:%dpx; top:%dpx; font-size:%dpx;"></span>
)r   r  r   r   r   r   r   )rB   r  r   rh   r  r  r  r  rC   rC   rD   
place_text  s   


zHTMLConverter.place_textFalsewriting_modec           	   	   C   sV   | j | j d | _d||||| j | j| | j || j || j f }| | d S )Nzv<div style="position:absolute; border: %s %dpx solid; writing-mode:%s; left:%dpx; top:%dpx; width:%dpx; height:%dpx;">)r  rW   r  r   r   r   )	rB   r  r  rh   r  r  rk   r  r  rC   rC   rD   	begin_div0  s   
zHTMLConverter.begin_divc                 C   s.   | j d ur
| d | j | _ | d d S )N</span>z</div>)r  r   r  rZ   )rB   r  rC   rC   rD   end_divL  s   

zHTMLConverter.end_divfontnamer   c                 C   sf   ||f}|| j kr,| j d ur| d |dd }| d||| j | j f  || _ | | d S )Nr  +z.<span style="font-family: %s; font-size:%dpx">)r  r   splitr   r   r   )rB   r   r  r   r   fontname_without_subset_tagrC   rC   rD   put_textR  s   


zHTMLConverter.put_textc                 C      |  d d S )Nz<br>r   rp   rC   rC   rD   put_newline`     zHTMLConverter.put_newliner   c                    sV   dt ttf dd ffdddtdd f fdd  |  jj7  _d S )Nr_   r=   c                    s2   t | tr dd|  | D ]}| qd S d S )Nr   r9   )rO   r   r  r   rB   
show_grouprC   rD   r*  d  s   

z0HTMLConverter.receive_layout.<locals>.show_groupc              
      s  t | trO j| j7  _dd|  jr3dj| j j   d| j d| j d | D ]} | q5| j	d urK| j	D ]}| qDd S d S t | t
r]dd|  d S t | trdd| j| j| j| j | D ]} | qrd d S t | tr| d| j| j| j| j d S jd	krt | trd
d|  | D ]} | qd S t | tr؈dd|  dt| jd | j| jd | D ]} | qd S t | trdd|  d|  | j| j| j d S d S t | tr| D ]} | qjdkr  d S d S t | tr;dd| j| j| j| j|   | D ]} | q,d d S t | trRt| j} |  || j d S t | t!ra"|   d S d S )NrE   r9   z*<div style="position:absolute; top:%dpx;">z	<a name="z">Page z</a></div>
r   r   exactr   r      r   loose)#rO   r   r   rK   r  r   r   r   r   groupsr   r   r  rH   r  r  r  r   r  r   r    r   r  rM   indexr   r   r  r'  get_writing_moder4   r  r$  r   r   )r_   r   groupr  r   rB   r*  rC   rD   r   j  s   








 





	
	
z,HTMLConverter.receive_layout.<locals>.render)r   r   r!   r   r   r   r   rC   r2  rD   rR   c  s    LzHTMLConverter.receive_layoutc                 C      |    d S r>   r  rp   rC   rC   rD   close     zHTMLConverter.close)r   r9   Nr9   r   r   Tr   Nr   NNr=   N)r  )!r   r   r   r   r   r(   r+   rM   r   r   r   rt   r   r   r   r?   r   r  r  r   r  r   r  r   r  r  r  r  r$  r'  r   rR   r5  rC   rC   rC   rD   r     s    
		

5




 	

Vr   c                   @   s   e Zd ZedZ					ddededed	e	d
e
e de
e deddfddZdeddfddZdddZdddZdeddfddZdeddfddZdddZdS ) XMLConverterz[ ---]r   r9   NFr:   r   r   r;   r<   r   stripcontrolr=   c                 C   sD   t j| |||||d | j| j krtd|| _|| _|   d S )Nr   r   )r   r?   r   r   r$   r   r9  r  )rB   r:   r   r   r;   r<   r   r9  rC   rC   rD   r?     s   

zXMLConverter.__init__r   c                 C   r  r>   r  r   rC   rC   rD   r     r  zXMLConverter.writec                 C   s0   | j r| d| j   n| d | d d S )Nz%<?xml version="1.0" encoding="%s" ?>
z<?xml version="1.0" ?>
z<pages>
r   r   rp   rC   rC   rD   r    s   
zXMLConverter.write_headerc                 C   r%  )Nz	</pages>
r&  rp   rC   rC   rD   r    r(  zXMLConverter.write_footerc                 C   s&   | j r
| jd|}| t| d S Nrf   )r9  CONTROLsubr   r3   r   rC   rC   rD   r     s   zXMLConverter.write_textr   c                    s>   dt dd ffdddt dd f fdd  | d S )Nr_   r=   c                    sj   t | tr d| jt| jf  d S t | tr3 dt| j  | D ]}| q% d d S d S )Nz<textbox id="%d" bbox="%s" />
z<textgroup bbox="%s">
z</textgroup>
)rO   r   r   r/  r2   rU   r   r   r)  rC   rD   r*    s   


z/XMLConverter.receive_layout.<locals>.show_groupc                    s  t | tr?d| jt| j| jf }| | D ]} | q| jd ur8d | jD ]}| q,d d d S t | trUd| j	t| jf }| d S t | t
rkd| j	t| jf }| d S t | trd| j	t| j|  f }| d S t | trd| j d	t| j d
}| | D ]} | qd d S t | trʈdt| j  | D ]} | qd d S t | trd}t | trd}d| jt| j|f }| | D ]} | qd d S t | tr%dt| jt| j| jj| jj| jf }| |   d d S t | tr6d|    d S t | trejd urXj| }dt|| j | j!f  d S d| j | j!f  d S J t"d| f)Nz%<page id="%s" bbox="%s" rotate="%d">
z	<layout>
z
</layout>
z</page>
z"<line linewidth="%d" bbox="%s" />
z"<rect linewidth="%d" bbox="%s" />
z+<curve linewidth="%d" bbox="%s" pts="%s"/>
z<figure name="z" bbox="z">
z
</figure>
z<textline bbox="%s">
z</textline>
rf   z wmode="vertical"z<textbox id="%d" bbox="%s"%s>
z</textbox>
zD<text font="%s" bbox="%s" colourspace="%s" ncolour="%s" size="%.3f">z</text>
z<text>%s</text>
z*<image src="%s" width="%d" height="%d" />
z!<image width="%d" height="%d" />
F	Unhandled)#rO   r   r   r2   rU   rotater   r.  r   r   r   r   get_ptsr   rT   r    r   r   r/  r   r3   r  r   r   r   r  r   r   r   r   r   r   r  r  rM   )r_   r  r   r1  wmoderT   r2  rC   rD   r     s   





















z+XMLConverter.receive_layout.<locals>.renderr   r   rC   r2  rD   rR     s   \zXMLConverter.receive_layoutc                 C   r3  r>   r4  rp   rC   rC   rD   r5  [  r6  zXMLConverter.close)r   r9   NNFr7  )r   r   r   r   compiler<  r(   r+   rM   r   r   r   r   r   r?   r   r  r  r   r   rR   r5  rC   rC   rC   rD   r8    s<    
	


kr8  c                   @   s   e Zd ZdZedZ				d#deded	e	d
e
dee defddZdede	fddZde	ddfddZd$ddZd$ddZde	ddfddZd$ddZdeddfdd Zd$d!d"ZdS )%HOCRConverterzKExtract an hOCR representation from explicit text information within a PDF.z[\x00-\x08\x0b-\x0c\x0e-\x1f]utf8r9   NFr:   r   r   r;   r<   r9  c                 C   s.   t j| |||||d || _d| _|   d S )Nr   F)r   r?   r9  within_charsr  )rB   r:   r   r   r;   r<   r9  rC   rC   rD   r?   r  s   	zHOCRConverter.__init__rU   r=   c           
      C   s\   |\}}}}t |}t | jd | }t |}t | jd | }	d| d| d| d|	 S )Nrz   zbbox  )r   	page_bbox)
rB   rU   in_x0in_y0in_x1in_y1out_x0out_y0out_x1out_y1rC   rC   rD   	bbox_repr  s   zHOCRConverter.bbox_reprr   c                 C   s>   | j r|| j }tt| j| d S tt| j| d S r>   )r   r   r   r   r   r   r   )rB   r   encoded_textrC   rC   rD   r     s   zHOCRConverter.writec                 C   sl   | j r| d| j   n| d | d | d | d | d | d | d | d	 d S )
NzQ<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en' lang='en' charset='%s'>
zD<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en' lang='en'>
z<head>
z<title></title>
zE<meta http-equiv='Content-Type' content='text/html;charset=utf-8' />
zA<meta name='ocr-system' content='pdfminer.six HOCR Converter' />
zR  <meta name='ocr-capabilities' content='ocr_page ocr_block ocr_line ocrx_word'/>
z</head>
z<body>
r:  rp   rC   rC   rD   r    s,   


zHOCRConverter.write_headerc                 C   s   |  d |  d d S )Nz0<!-- comment in the following line to debug -->
zD<!--script src='https://unpkg.com/hocrjs'></script--></body></html>
r&  rp   rC   rC   rD   r    s   
zHOCRConverter.write_footerc                 C   s"   | j r
| jd|}| | d S r;  )r9  r<  r=  r   r   rC   rC   rD   r     s   zHOCRConverter.write_textc                 C   sn   t | jdkr2d}d| jv rd}d| jv r|d7 }| d| j| j|| | j| j| j| j f  d| _d S )	Nr   rf   Italiczfont-style: italic; Boldzfont-weight: bold; zg<span style='font:"%s"; font-size:%d; %s' class='ocrx_word' title='%s; x_font %s; x_fsize %d'>%s</span>F)	rN   working_textworking_fontr   working_sizerQ  working_bboxstriprF  )rB   bold_and_italic_stylesrC   rC   rD   
write_word  s&   



zHOCRConverter.write_wordr   c                    s$   dt dd f fdd  | d S )Nr_   r=   c                    s  j rt| tr  t| tr3| j_d| j	| jf  | D ]} | q%d d S t| t
rSd	| j  | D ]} | qEd d S t| trvd| j	| jf  | D ]} | qhd d S t| trj sd_ |  _| j_| j_| j_d S t|   dkr  |   d S jd | jd ksj| jksj| jkrш  | j_| j_| j_ j|  7  _jd jd | jd	 jd
 f_d S d S )Nz*<div class='ocr_page' id='%s' title='%s'>
z</div>
z"<span class='ocr_line' title='%s'>r  z+<div class='ocr_block' id='%d' title='%s'>
Tr   r9   rw   rz   )rF  rO   r   r[  r   rU   rH  r   r   rQ  r    r   r/  r   r   rU  rX  r  rV  r  rW  rN   rY  )r_   r   
child_liner   rC   rD   r     sh   








z,HOCRConverter.receive_layout.<locals>.renderrB  r   rC   r   rD   rR     s   7zHOCRConverter.receive_layoutc                 C   r3  r>   r4  rp   rC   rC   rD   r5    r6  zHOCRConverter.close)rE  r9   NFr7  )r   r   r   __doc__r   rC  r<  r(   r+   rM   r   r   r   r   r?   r/   rQ  r   r  r  r   r[  r   rR   r5  rC   rC   rC   rD   rD  _  s8    

	


:rD  )Nr   loggingr   typingr   r   r   r   r   r   r   r	   r
   r   r   pdfminerr   pdfminer.imager   pdfminer.layoutr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   pdfminer.pdfcolorr"   pdfminer.pdfdevicer#   pdfminer.pdfexceptionsr$   pdfminer.pdffontr%   r&   pdfminer.pdfinterpr'   r(   pdfminer.pdfpager)   pdfminer.pdftypesr*   pdfminer.utilsr+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   	getLoggerr   r   r6   r   r   r   r   r   r8  rD  rC   rC   rC   rD   <module>   s8    4T4
 T:  = #