a
    åzeðC  ã                   @   s´   d dl Z d dlZd dlmZ dgZe  d¡Ze  d¡Ze  d¡Ze  d¡Z	e  d¡Z
e  d	¡Ze  d
¡Ze  d¡Ze  d¡Ze  de j¡Ze  d	¡Ze  d¡ZG dd„ dejƒZdS )é    N)ÚunescapeÚ
HTMLParserz[&<]z
&[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z	<[a-zA-Z]ú>z--\s*>z+([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*z]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*aF  
  <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
  (?:[\s/]*                          # optional whitespace before attribute name
    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
      (?:\s*=+\s*                    # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |"[^"]*"                   # LIT-enclosed value
          |(?!['"])[^>\s]*           # bare value
         )
        \s*                          # possibly followed by a space
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                # trailing whitespace
z#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c                   @   sÜ   e Zd ZdZddœdd„Zdd„ Zdd	„ Zd
d„ ZdZdd„ Z	dd„ Z
dd„ Zdd„ Zdd„ Zd6dd„Zdd„ Zdd„ Zdd„ Zd d!„ Zd"d#„ Zd$d%„ Zd&d'„ Zd(d)„ Zd*d+„ Zd,d-„ Zd.d/„ Zd0d1„ Zd2d3„ Zd4d5„ ZdS )7r   )ZscriptÚstyleT)Úconvert_charrefsc                C   s   || _ |  ¡  d S ©N)r   Úreset)Úselfr   © r
   ú!/usr/lib/python3.9/html/parser.pyÚ__init__V   s    zHTMLParser.__init__c                 C   s(   d| _ d| _t| _d | _tj | ¡ d S )NÚ z???)ÚrawdataÚlasttagÚinteresting_normalÚinterestingÚ
cdata_elemÚ_markupbaseÚ
ParserBaser   ©r	   r
   r
   r   r   _   s
    zHTMLParser.resetc                 C   s   | j | | _ |  d¡ d S )Nr   )r   Úgoahead©r	   Údatar
   r
   r   Úfeedg   s    zHTMLParser.feedc                 C   s   |   d¡ d S )Né   )r   r   r
   r
   r   Úclosep   s    zHTMLParser.closeNc                 C   s   | j S r   )Ú_HTMLParser__starttag_textr   r
   r
   r   Úget_starttag_textv   s    zHTMLParser.get_starttag_textc                 C   s$   |  ¡ | _t d| j tj¡| _d S )Nz</\s*%s\s*>)Úlowerr   ÚreÚcompileÚIr   )r	   Úelemr
   r
   r   Úset_cdata_modez   s    
zHTMLParser.set_cdata_modec                 C   s   t | _d | _d S r   )r   r   r   r   r
   r
   r   Úclear_cdata_mode~   s    zHTMLParser.clear_cdata_modec                 C   sJ  | j }d}t|ƒ}||k rÚ| jrv| jsv| d|¡}|dk r | dt||d ƒ¡}|dkrpt d¡ 	||¡spqÚ|}n*| j
 	||¡}|r’| ¡ }n| jrœqÚ|}||k rÞ| jrÌ| jsÌ|  t|||… ƒ¡ n|  |||… ¡ |  ||¡}||kröqÚ|j}|d|ƒrJt ||¡r"|  |¡}	n†|d|ƒr:|  |¡}	nn|d|ƒrR|  |¡}	nV|d|ƒrj|  |¡}	n>|d	|ƒr‚|  |¡}	n&|d
 |k rÚ|  d¡ |d
 }	nqÚ|	dk r<|s¼qÚ| d|d
 ¡}	|	dk rú| d|d
 ¡}	|	dk r|d
 }	n|	d
7 }	| jr*| js*|  t|||	… ƒ¡ n|  |||	… ¡ |  ||	¡}q|d|ƒrðt ||¡}|r²| ¡ dd… }
|  |
¡ | ¡ }	|d|	d
 ƒs¢|	d
 }	|  ||	¡}qn<d||d … v rÚ|  |||d … ¡ |  ||d ¡}qÚq|d|ƒrt ||¡}|rN| d
¡}
|  |
¡ | ¡ }	|d|	d
 ƒs@|	d
 }	|  ||	¡}qt ||¡}|r¨|rÚ| ¡ ||d … krÚ| ¡ }	|	|kr’|}	|  ||d
 ¡}qÚn.|d
 |k rÚ|  d¡ |  ||d
 ¡}nqÚqq|r8||k r8| js8| jr| js|  t|||… ƒ¡ n|  |||… ¡ |  ||¡}||d … | _ d S )Nr   ú<ú&é"   z[\s;]z</ú<!--z<?z<!r   r   z&#é   éÿÿÿÿú;)r   Úlenr   r   ÚfindÚrfindÚmaxr   r    Úsearchr   ÚstartÚhandle_datar   Z	updateposÚ
startswithÚstarttagopenÚmatchÚparse_starttagÚparse_endtagÚparse_commentÚparse_piÚparse_html_declarationÚcharrefÚgroupÚhandle_charrefÚendÚ	entityrefÚhandle_entityrefÚ
incomplete)r	   r>   r   ÚiÚnÚjZampposr5   r3   ÚkÚnamer
   r
   r   r   …   sÂ    
ÿ












zHTMLParser.goaheadc                 C   s¢   | j }|||d … dkr$|  |¡S |||d … dkrB|  |¡S |||d …  ¡ dkr”| d|d ¡}|dkrvdS |  ||d	 |… ¡ |d
 S |  |¡S d S )Né   r(   é   z<![é	   z	<!doctyper   r*   r)   r   )r   r8   Zparse_marked_sectionr   r-   Úhandle_declÚparse_bogus_comment)r	   rB   r   Úgtposr
   r
   r   r:   ÿ   s    

z!HTMLParser.parse_html_declarationr   c                 C   sD   | j }| d|d ¡}|dkr"dS |r<|  ||d |… ¡ |d S )Nr   r)   r*   r   )r   r-   Úhandle_comment)r	   rB   Úreportr   Úposr
   r
   r   rK     s    zHTMLParser.parse_bogus_commentc                 C   sH   | j }t ||d ¡}|sdS | ¡ }|  ||d |… ¡ | ¡ }|S )Nr)   r*   )r   Úpicloser0   r1   Ú	handle_pir>   )r	   rB   r   r5   rD   r
   r
   r   r9      s    zHTMLParser.parse_pic                 C   sÞ  d | _ |  |¡}|dk r|S | j}|||… | _ g }t ||d ¡}| ¡ }| d¡ ¡  | _}||k r t	 ||¡}|s~q | ddd¡\}	}
}|
sœd }nZ|d d… d  krÀ|dd … ksên |d d… d  kræ|dd … krön n|dd… }|rt
|ƒ}| |	 ¡ |f¡ | ¡ }q`|||…  ¡ }|dvrž|  ¡ \}}d	| j v rz|| j  d	¡ }t| j ƒ| j  d	¡ }n|t| j ƒ }|  |||… ¡ |S | d
¡r¸|  ||¡ n"|  ||¡ || jv rÚ|  |¡ |S )Nr   r   r)   rH   ú'r*   ú")r   ú/>Ú
rT   )r   Úcheck_for_whole_start_tagr   Útagfind_tolerantr5   r>   r<   r   r   Úattrfind_tolerantr   ÚappendÚstripZgetposÚcountr,   r.   r2   ÚendswithÚhandle_startendtagÚhandle_starttagÚCDATA_CONTENT_ELEMENTSr#   )r	   rB   Úendposr   Úattrsr5   rE   ÚtagÚmÚattrnameÚrestZ	attrvaluer>   ÚlinenoÚoffsetr
   r
   r   r6   ,  sX    

&ÿ
ÿ



ÿ
zHTMLParser.parse_starttagc                 C   s¶   | j }t ||¡}|rª| ¡ }|||d … }|dkr>|d S |dkr~| d|¡rZ|d S | d|¡rjdS ||krv|S |d S |dkrŠdS |dv r–dS ||kr¢|S |d S td	ƒ‚d S )
Nr   r   ú/rT   r)   r*   r   z6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZzwe should not get here!)r   Úlocatestarttagend_tolerantr5   r>   r3   ÚAssertionError)r	   rB   r   rc   rD   Únextr
   r
   r   rV   _  s.    z$HTMLParser.check_for_whole_start_tagc                 C   s  | j }t ||d ¡}|sdS | ¡ }t ||¡}|sÀ| jd urV|  |||… ¡ |S t ||d ¡}|s|||d … dkr†|d S |  	|¡S | 
d¡ ¡ }| d| ¡ ¡}|  |¡ |d S | 
d¡ ¡ }| jd urø|| jkrø|  |||… ¡ |S |  |¡ |  ¡  |S )Nr   r*   r)   rH   z</>r   )r   Ú	endendtagr0   r>   Ú
endtagfindr5   r   r2   rW   rK   r<   r   r-   Úhandle_endtagr$   )r	   rB   r   r5   rL   Z	namematchZtagnamer"   r
   r
   r   r7     s6    





zHTMLParser.parse_endtagc                 C   s   |   ||¡ |  |¡ d S r   )r^   rn   ©r	   rb   ra   r
   r
   r   r]   ©  s    zHTMLParser.handle_startendtagc                 C   s   d S r   r
   ro   r
   r
   r   r^   ®  s    zHTMLParser.handle_starttagc                 C   s   d S r   r
   )r	   rb   r
   r
   r   rn   ²  s    zHTMLParser.handle_endtagc                 C   s   d S r   r
   ©r	   rF   r
   r
   r   r=   ¶  s    zHTMLParser.handle_charrefc                 C   s   d S r   r
   rp   r
   r
   r   r@   º  s    zHTMLParser.handle_entityrefc                 C   s   d S r   r
   r   r
   r
   r   r2   ¾  s    zHTMLParser.handle_datac                 C   s   d S r   r
   r   r
   r
   r   rM   Â  s    zHTMLParser.handle_commentc                 C   s   d S r   r
   )r	   Zdeclr
   r
   r   rJ   Æ  s    zHTMLParser.handle_declc                 C   s   d S r   r
   r   r
   r
   r   rQ   Ê  s    zHTMLParser.handle_pic                 C   s   d S r   r
   r   r
   r
   r   Úunknown_declÍ  s    zHTMLParser.unknown_decl)r   )Ú__name__Ú
__module__Ú__qualname__r_   r   r   r   r   r   r   r#   r$   r   r:   rK   r9   r6   rV   r7   r]   r^   rn   r=   r@   r2   rM   rJ   rQ   rq   r
   r
   r
   r   r   >   s4   		z
3"()r   r   Zhtmlr   Ú__all__r    r   rA   r?   r;   r4   rP   ZcommentcloserW   rX   ÚVERBOSEri   rl   rm   r   r   r
   r
   r
   r   Ú<module>   s(   







ÿò

