Current File : //usr/lib/python3.6/site-packages/bs4/builder/__pycache__/_htmlparser.cpython-36.pyc
3

6]�3�@s�dZdgZddlmZyddlmZWn2ek
rXZzGdd�de�ZWYddZ[XnXddlZddl	Z	ej
dd�\ZZZ
edko�ed	ko�e
dkZedko�edkZedko�ed
kZddlmZmZmZmZmZddlmZmZdd
lmZmZmZdZGdd�de�ZGdd�de�Z edk�r�ed	k�r�e�r�ddl!Z!e!j"d�Z#e#e _#e!j"de!j$�Z%e%e_%ddlm&Z&m'Z'dd�Z(dd�Z)e(e_(e)e_)dZdS)zCUse the HTMLParser library to parse HTML files that aren't too bad.�HTMLParserTreeBuilder�)�
HTMLParser)�HTMLParseErrorc@seZdZdS)rN)�__name__�
__module__�__qualname__�rr�!/usr/lib/python3.6/_htmlparser.pyrsrN���)�CData�Comment�Declaration�Doctype�ProcessingInstruction)�EntitySubstitution�
UnicodeDammit)�HTML�HTMLTreeBuilder�STRICTzhtml.parserc@speZdZdd�Zdd�Zdd�Zddd	�Zdd
d�Zdd
�Zdd�Z	dd�Z
dd�Zdd�Zdd�Z
dd�ZdS)�BeautifulSoupHTMLParsercOstj|f|�|�g|_dS)N)r�__init__�already_closed_empty_element)�self�args�kwargsrrr	r9s	z BeautifulSoupHTMLParser.__init__cCstj|�dS)aiIn Python 3, HTMLParser subclasses must implement error(), although this
        requirement doesn't appear to be documented.

        In Python 2, HTMLParser implements error() as raising an exception.

        In any event, this method is called only on very strange markup and our best strategy
        is to pretend it didn't happen and keep going.
        N)�warnings�warn)r�msgrrr	�errorEs	zBeautifulSoupHTMLParser.errorcCs|j||dd�}|j|�dS)NF)�handle_empty_element)�handle_starttag�
handle_endtag)r�name�attrs�tagrrr	�handle_startendtagPsz*BeautifulSoupHTMLParser.handle_startendtagTc	Csli}x(|D] \}}|dkrd}|||<d}q
W|jj|dd|�}|rh|jrh|rh|j|dd�|jj|�dS)N�z""F)�check_already_closed)�soupr"Zis_empty_elementr#r�append)	rr$r%r!Z	attr_dict�key�value�	attrvaluer&rrr	r"[s
z'BeautifulSoupHTMLParser.handle_starttagcCs,|r||jkr|jj|�n|jj|�dS)N)r�remover*r#)rr$r)rrr	r#wsz%BeautifulSoupHTMLParser.handle_endtagcCs|jj|�dS)N)r*�handle_data)r�datarrr	r0�sz#BeautifulSoupHTMLParser.handle_datacCs�|jd�rt|jd�d�}n$|jd�r8t|jd�d�}nt|�}d}|dkr�xR|jjdfD]B}|sdqZyt|g�j|�}WqZtk
r�}zWYdd}~XqZXqZW|s�yt|�}Wn&t	t
fk
r�}zWYdd}~XnX|p�d}|j|�dS)N�x��X�zwindows-1252u�)�
startswith�int�lstripr*�original_encoding�	bytearray�decode�UnicodeDecodeError�chr�
ValueError�
OverflowErrorr0)rr$Z	real_namer1�encoding�errr	�handle_charref�s*

z&BeautifulSoupHTMLParser.handle_charrefcCs0tjj|�}|dk	r|}nd|}|j|�dS)Nz&%s)rZHTML_ENTITY_TO_CHARACTER�getr0)rr$�	characterr1rrr	�handle_entityref�s
z(BeautifulSoupHTMLParser.handle_entityrefcCs&|jj�|jj|�|jjt�dS)N)r*�endDatar0r)rr1rrr	�handle_comment�s
z&BeautifulSoupHTMLParser.handle_commentcCsN|jj�|jd�r&|td�d�}n|dkr2d}|jj|�|jjt�dS)NzDOCTYPE ZDOCTYPEr()r*rFr6�lenr0r)rr1rrr	�handle_decl�s

z#BeautifulSoupHTMLParser.handle_declcCsN|j�jd�r$t}|td�d�}nt}|jj�|jj|�|jj|�dS)NzCDATA[)�upperr6r
rHrr*rFr0)rr1�clsrrr	�unknown_decl�s
z$BeautifulSoupHTMLParser.unknown_declcCs&|jj�|jj|�|jjt�dS)N)r*rFr0r)rr1rrr	�	handle_pi�s
z!BeautifulSoupHTMLParser.handle_piN)T)T)rrrrr r'r"r#r0rBrErGrIrLrMrrrr	r7s

!


rc@s<eZdZdZdZeZeeegZ	dd�Z
d
dd�Zdd	�ZdS)rFTcOs,trtrd|d<trd|d<||f|_dS)NF�strictZconvert_charrefs)�CONSTRUCTOR_TAKES_STRICT� CONSTRUCTOR_STRICT_IS_DEPRECATED�"CONSTRUCTOR_TAKES_CONVERT_CHARREFS�parser_args)rrrrrr	r�s

zHTMLParserTreeBuilder.__init__NccsNt|t�r|dddfVdS||g}t||d|d�}|j|j|j|jfVdS)z�
        :return: A 4-tuple (markup, original encoding, encoding
        declared within markup, whether any characters had to be
        replaced with REPLACEMENT CHARACTER).
        NFT)Zis_html�exclude_encodings)�
isinstance�strr�markupr9Zdeclared_html_encodingZcontains_replacement_characters)rrVZuser_specified_encodingZdocument_declared_encodingrSZ
try_encodingsZdammitrrr	�prepare_markup�s
z$HTMLParserTreeBuilder.prepare_markupcCsr|j\}}t||�}|j|_y|j|�|j�Wn4tk
rf}ztjtd��|�WYdd}~XnXg|_	dS)Na*Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help.)
rRrr*�feed�closerrr�RuntimeWarningr)rrVrr�parserrArrr	rX�s


zHTMLParserTreeBuilder.feed)NNN)
rrrZis_xmlZ	picklable�
HTMLPARSER�NAMErrZfeaturesrrWrXrrrr	r�s

zQ\s*((?<=[\'"\s])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?a�
  <[a-zA-Z][-.a-zA-Z0-9:_]*          # tag name
  (?:\s+                             # whitespace before attribute name
    (?:[a-zA-Z_][-.:a-zA-Z0-9_]*     # attribute name
      (?:\s*=\s*                     # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |\"[^\"]*\"                # LIT-enclosed value
          |[^'\">\s]+                # bare value
         )
       )?
     )
   )*
  \s*                                # trailing whitespace
)�tagfind�attrfindcCs8d|_|j|�}|dkr|S|j}|||�|_g}tj||d�}|sPtd��|j�}||d|�j�|_}x�||k�rP|j	r�t
j||�}ntj||�}|s�P|jddd�\}	}
}|
s�d}nb|dd�dko�|d
d�kn�s|dd�dk�o|dd�kn�r$|dd�}|�r4|j
|�}|j|	j�|f�|j�}qtW|||�j�}|dk�r�|j�\}
}d
|jk�r�|
|jjd
�}
t|j�|jjd
�}n|t|j�}|j	�r�|jd|||�dd�f�|j|||��|S|jd	��r|j||�n"|j||�||jk�r4|j|�|S)Nr�z#unexpected call to parse_starttag()rr
�'�"�>�/>�
z junk characters in start tag: %r����rgrg)rcrd)Z__starttag_textZcheck_for_whole_start_tag�rawdatar^�match�AssertionError�end�lowerZlasttagrNr_�attrfind_tolerant�groupZunescaper+�stripZgetpos�countrH�rfindr r0�endswithr'r"ZCDATA_CONTENT_ELEMENTS�set_cdata_mode)r�i�endposrhr%ri�kr&�mZattrname�restr.rk�lineno�offsetrrr	�parse_starttags\
*,


r{cCs$|j�|_tjd|jtj�|_dS)Nz</\s*%s\s*>)rlZ
cdata_elem�re�compile�IZinteresting)r�elemrrr	rsTs
rsT)*�__doc__�__all__Zhtml.parserrr�ImportErrorrA�	Exception�sysr�version_info�major�minor�releaserOrPrQZbs4.elementr
rrrrZ
bs4.dammitrrZbs4.builderrrrr\rrr|r}rm�VERBOSEZlocatestarttagendr^r_r{rsrrrr	�<module>sB"	2
7
Page not found – Hello World !