
    MhE                    L   d Z ddlmZ dZdgZddlmZ ddlmZm	Z	m
Z
mZmZmZmZmZmZmZmZ ddlmZmZmZmZmZmZ ddlmZmZ dd	lmZmZm Z m!Z! dd
l"m#Z# erddl$m%Z% ddlm&Z& ddl'm(Z(m)Z)m*Z* dZ+e	ee,e,f         e,e,gdf         Z- G d dee          Z. G d de           Z/dS )zCUse the HTMLParser library to parse HTML files that aren't too bad.    )annotationsMITHTMLParserTreeBuilder)
HTMLParser)AnyCallablecastDictIterableListOptionalTYPE_CHECKINGTupleTypeUnion)AttributeDictCDataCommentDeclarationDoctypeProcessingInstruction)EntitySubstitutionUnicodeDammit)DetectsXMLParsedAsHTMLHTMLHTMLTreeBuilderSTRICTParserRejectedMarkup)BeautifulSoup)NavigableString)	_Encoding
_Encodings
_RawMarkupzhtml.parserNc                      e Zd ZU dZded<   dZded<   	 edd(dZded<   ded<   ded<   d)dZd*dZ	 d+d,dZ	d+d-dZ
d.d Zd/d!Zd/d"Zd.d#Zd.d$Zd.d%Zd.d&Zd'S )0BeautifulSoupHTMLParserreplacestrREPLACEignoreIGNOREon_duplicate_attributesoupr    argsr   r-   &Union[str, _DuplicateAttributeHandler]kwargsc                   || _         || _        |j        j        | _        t	          j        | g|R i | g | _        |                                  d S N)r.   r-   builderattribute_dict_classr   __init__already_closed_empty_element_initialize_xml_detector)selfr.   r-   r/   r1   s        W/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/bs4/builder/_htmlparser.pyr6   z BeautifulSoupHTMLParser.__init__T   sc     	&<#$(L$E!D242226222 -/)%%'''''    z	List[str]r7   messagereturnNonec                     t          |          r3   r   )r9   r<   s     r:   errorzBeautifulSoupHTMLParser.erroro   s     #7+++r;   nameattrsList[Tuple[str, Optional[str]]]c                `    |                      ||d           |                     |           dS )zHandle an incoming empty-element tag.

        html.parser only calls this method when the markup looks like
        <tag/>.
        F)handle_empty_elementN)handle_starttaghandle_endtag)r9   rA   rB   s      r:   handle_startendtagz*BeautifulSoupHTMLParser.handle_startendtag   s9     	T5uEEE4     r;   TrE   boolc                4   |                                  }|D ]Y\  }}|d}||v rG| j        }|| j        k    r |d| j        fv r|||<   1t	          t
          |          } ||||           T|||<   Z| j        j        j        r| 	                                \  }}	ndx}}	| j        
                    |dd|||	          }
|
r:|
j        r3|r1|                     |d           | j                            |           | j        |                     |           dS dS )zHandle an opening tag, e.g. '<tag>'

        :param handle_empty_element: True if this tag is known to be
            an empty-element tag (i.e. there is not expected to be any
            closing tag).
        N )
sourceline	sourceposF)check_already_closed)r5   r-   r+   r)   r	   _DuplicateAttributeHandlerr.   r4   store_line_numbersgetposrF   is_empty_elementrG   r7   append_root_tag_name_root_tag_encountered)r9   rA   rB   rE   	attr_dictkeyvalueon_duperL   rM   tags              r:   rF   z'BeautifulSoupHTMLParser.handle_starttag   s|    $(#<#<#>#>	 	' 	'JC }i 5dk))t| 444%*IcNN"#=wGGGGIsE2222!&	# 9/ 	*$(KKMM!J		%))Ji''$iJ) ( 
 
  	;3' 	;,@ 	; t%@@@ -44T:::&&&t,,,,, '&r;   rN   c                    |r%|| j         v r| j                             |           dS | j                            |           dS )zHandle a closing tag, e.g. '</tag>'

        :param name: A tag name.
        :param check_already_closed: True if this tag is expected to
           be the closing portion of an empty-element tag,
           e.g. '<tag></tag>'.
        N)r7   remover.   rG   )r9   rA   rN   s      r:   rG   z%BeautifulSoupHTMLParser.handle_endtag   sT       	*DD,M$M$M
 -44T:::::I##D)))))r;   datac                :    | j                             |           dS )z4Handle some textual data that shows up between tags.N)r.   handle_datar9   r]   s     r:   r_   z#BeautifulSoupHTMLParser.handle_data   s    	d#####r;   c                .   |                     d          r$t          |                    d          d          }nH|                     d          r$t          |                    d          d          }nt          |          }d}|dk     rH| j        j        dfD ]9}|s	 t          |g                              |          }*# t          $ r Y 6w xY w|s(	 t          |          }n# t          t          f$ r Y nw xY w|pd}|                     |           dS )zHandle a numeric character reference by converting it to the
        corresponding Unicode character and treating it as textual
        data.

        :param name: Character number, possibly in hexadecimal.
        x   XN   zwindows-1252u   �)
startswithintlstripr.   original_encoding	bytearraydecodeUnicodeDecodeErrorchr
ValueErrorOverflowErrorr_   )r9   rA   	real_namer]   encodings        r:   handle_charrefz&BeautifulSoupHTMLParser.handle_charref   sM    ??3 	"DKK,,b11II__S!! 	"DKK,,b11IID		Is?? "Y8.I   $i[1188BBDD)   D 	9~~.   22s$   #C
CCC% %C98C9c                    t           j                            |          }||}nd|z  }|                     |           dS )zHandle a named entity reference by converting it to the
        corresponding Unicode character(s) and treating it as textual
        data.

        :param name: Name of the entity reference.
        Nz&%s)r   HTML_ENTITY_TO_CHARACTERgetr_   )r9   rA   	characterr]   s       r:   handle_entityrefz(BeautifulSoupHTMLParser.handle_entityref
  sL     '?CCDII	 DD 4<Dr;   c                    | j                                          | j                             |           | j                             t                     dS )zOHandle an HTML comment.

        :param data: The text of the comment.
        N)r.   endDatar_   r   r`   s     r:   handle_commentz&BeautifulSoupHTMLParser.handle_comment  sJ    
 			d###	'"""""r;   c                    | j                                          |t          d          d         }| j                             |           | j                             t                     dS )zYHandle a DOCTYPE declaration.

        :param data: The text of the declaration.
        zDOCTYPE N)r.   ry   lenr_   r   r`   s     r:   handle_declz#BeautifulSoupHTMLParser.handle_decl&  s`    
 		C
OO%%&	d###	'"""""r;   c                :   |                                                     d          rt          }|t          d          d         }nt          }| j                                         | j                            |           | j                            |           dS )z{Handle a declaration of unknown type -- probably a CDATA block.

        :param data: The text of the declaration.
        zCDATA[N)upperrf   r   r|   r   r.   ry   r_   )r9   r]   clss      r:   unknown_declz$BeautifulSoupHTMLParser.unknown_decl0  s     ::<<""8,, 	CH(DDC		d###	#r;   c                    | j                                          | j                             |           |                     |           | j                             t                     dS )z\Handle a processing instruction.

        :param data: The text of the instruction.
        N)r.   ry   r_   _document_might_be_xmlr   r`   s     r:   	handle_piz!BeautifulSoupHTMLParser.handle_pi?  s_    
 			d#####D)))	/00000r;   N)r.   r    r/   r   r-   r0   r1   r   )r<   r(   r=   r>   )rA   r(   rB   rC   r=   r>   )T)rA   r(   rB   rC   rE   rI   r=   r>   )rA   r(   rN   rI   r=   r>   )r]   r(   r=   r>   )rA   r(   r=   r>   )__name__
__module____qualname__r)   __annotations__r+   r6   r@   rH   rF   rG   r_   rr   rw   rz   r}   r   r    r;   r:   r&   r&   =   sz         G F$ JQ	( ( ( ( ( (. CBBB++++, , , , ! ! ! !& &*	<- <- <- <- <-|* * * * *$$ $ $ $& & & &P   &# # # ## # # #   1 1 1 1 1 1r;   r&   c                       e Zd ZU dZdZded<   dZded<   eZded<   ee	e
gZd	ed
<   ded<   dZded<   	 	 d!d" fdZ	 	 	 d#d$dZd%d Z xZS )&r   zA Beautiful soup `bs4.builder.TreeBuilder` that uses the
    :py:class:`html.parser.HTMLParser` parser, found in the Python
    standard library.

    FrI   is_xmlT	picklabler(   NAMEzIterable[str]featuresz$Tuple[Iterable[Any], Dict[str, Any]]parser_argsTRACKS_LINE_NUMBERSNOptional[Iterable[Any]]parser_kwargsOptional[Dict[str, Any]]r1   r   c                    t                      }dD ] }||v r|                    |          }|||<   ! t          t          |           j        di | |pg }|pi }|                    |           d|d<   ||f| _        dS )a  Constructor.

        :param parser_args: Positional arguments to pass into
            the BeautifulSoupHTMLParser constructor, once it's
            invoked.
        :param parser_kwargs: Keyword arguments to pass into
            the BeautifulSoupHTMLParser constructor, once it's
            invoked.
        :param kwargs: Keyword arguments for the superclass constructor.
        r,   Fconvert_charrefsNr   )dictpopsuperr   r6   updater   )r9   r   r   r1   extra_parser_kwargsargrX   	__class__s          r:   r6   zHTMLParserTreeBuilder.__init__[  s    $ #ff. 	1 	1Cf}}

3+0#C(3#T**3==f===!'R%+0111,1()'7r;   markupr$   user_specified_encodingOptional[_Encoding]document_declared_encodingexclude_encodingsOptional[_Encodings]r=   DIterable[Tuple[str, Optional[_Encoding], Optional[_Encoding], bool]]c              #  8  K   t          |t                    r
|dddfV  dS g }|r|                    |           g }|r|                    |           t          |||d|          }|j        t          d          |j        |j        |j        |j        fV  dS )a2  Run any preliminary steps necessary to make incoming markup
        acceptable to the parser.

        :param markup: Some markup -- probably a bytestring.
        :param user_specified_encoding: The user asked to try this encoding.
        :param document_declared_encoding: The markup itself claims to be
            in this encoding.
        :param exclude_encodings: The user asked _not_ to try any of
            these encodings.

        :yield: A series of 4-tuples: (markup, encoding, declared encoding,
             has undergone character replacement)

            Each 4-tuple represents a strategy for parsing the document.
            This TreeBuilder uses Unicode, Dammit to convert the markup
            into Unicode, so the ``markup`` element of the tuple will
            always be a string.
        NFT)known_definite_encodingsuser_encodingsis_htmlr   zPCould not convert input to Unicode, and html.parser will not accept bytestrings.)	
isinstancer(   rS   r   unicode_markupr   ri   declared_html_encodingcontains_replacement_characters)r9   r   r   r   r   r   r   dammits           r:   prepare_markupz$HTMLParserTreeBuilder.prepare_markupy  s      2 fc"" 	4u----F 57 " 	E
 %++,CDDD*,% 	> !!"<===%=)/
 
 
  ( 'b  
 %(-6	     r;   r>   c                *   | j         \  }}t          |t                    sJ | j        J t	          | j        g|R i |}	 |                    |           |                                 n!# t          $ r}t          |          d }~ww xY wg |_	        d S r3   )
r   r   r(   r.   r&   feedcloseAssertionErrorr   r7   )r9   r   r/   r1   parseres         r:   r   zHTMLParserTreeBuilder.feed  s    'f &#&&&&&
 y$$$(DTDDDVDD	*KKLLNNNN 	* 	* 	* 'q)))		*
 /1+++s   )A+ +
B	5BB	)NN)r   r   r   r   r1   r   )NNN)
r   r$   r   r   r   r   r   r   r=   r   )r   r$   r=   r>   )r   r   r   __doc__r   r   r   
HTMLPARSERr   r   r   r   r   r6   r   r   __classcell__)r   s   @r:   r   r   J  s          FID#T62H22225555 !%$$$$ 04268 8 8 8 8 8 8B 8<:>26F F F F FP1 1 1 1 1 1 1 1r;   )0r   
__future__r   __license____all__html.parserr   typingr   r   r	   r
   r   r   r   r   r   r   r   bs4.elementr   r   r   r   r   r   
bs4.dammitr   r   bs4.builderr   r   r   r   bs4.exceptionsr   bs4r    r!   bs4._typingr"   r#   r$   r   r(   rO   r&   r   r   r;   r:   <module>r      sx   I I " " " " " "   # " " " " "                                         9 8 8 8 8 8 8 8            0 / / / / / !!!!!!++++++          
%tCH~sC&@$&FG J1 J1 J1 J1 J1j*@ J1 J1 J1ZP1 P1 P1 P1 P1O P1 P1 P1 P1 P1r;   