
    Mh>Y                        d Z dgZddlmZmZmZmZmZmZm	Z	m
Z
mZ ddlmZ ddlmZmZmZmZmZmZ ddlZddlmZmZmZmZmZ ddlmZmZmZ ddl Z dd	l!m"Z" dd
lm#Z#m$Z$m%Z%m&Z& e	rddl'm(Z( ddl)m*Z+  G d de          Z, G d de+j-                  Z. G d de/          Z0 G d de+j1                  Z2 G d de2          Z3 G d de2          Z4dS )MITHTML5TreeBuilder    )	AnycastDictIterableOptionalSequenceTYPE_CHECKINGTupleUnion)	TypeAlias)_AttributeValue_AttributeValues	_Encoding
_Encodings_NamespaceURL
_RawMarkupN)DetectsXMLParsedAsHTML
PERMISSIVEHTMLHTML_5HTMLTreeBuilder)NamespacedAttributePageElementnonwhitespace_re)
namespaces)CommentDoctypeNavigableStringTagBeautifulSoup)basec                   "   e Zd ZU dZdZeed<   eeee	gZ
ee         ed<   dZeed<   ded<   ee         ed	<   	 	 	 dded	ee         dee         dee         deeeee         ee         ef                  f
dZdedd
fdZdeddfdZdedefdZd
S )r   aj  Use `html5lib <https://github.com/html5lib/html5lib-python>`_ to
    build a tree.

    Note that `HTML5TreeBuilder` does not support some common HTML
    `TreeBuilder` features. Some of these features could theoretically
    be implemented, but at the very least it's quite difficult,
    because html5lib moves the parse tree around as it's being built.

    Specifically:

    * This `TreeBuilder` doesn't use different subclasses of
      `NavigableString` (e.g. `Script`) based on the name of the tag
      in which the string was found.
    * You can't use a `SoupStrainer` to parse only part of a document.
    html5libNAMEfeaturesTTRACKS_LINE_NUMBERSTreeBuilderForHtml5libunderlying_builderuser_specified_encodingNmarkupdocument_declared_encodingexclude_encodingsreturnc              #      K   || _         |df|dffD ]$\  }}|rt          j        d| d| dd           %t          j        |d           |d d dfV  d S )	Nr.   r/   zYou provided a value for z0, but the html5lib tree builder doesn't support .   
stacklevelF)r,   warningswarnr   warn_if_markup_looks_like_xml)selfr-   r,   r.   r/   variablenames          U/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/bs4/builder/_html5lib.pyprepare_markupzHTML5TreeBuilder.prepare_markupW   s       (?$ ()EF 34
 	 	NHd  mmmfjmmm     	<VPQRRRRtT5))))))    c                    | j         "| j         j        t          j        dd           t	          j        | j                  }| j        J || j        _        t                      }t          |t                    s
| j        |d<    |j        |fi |}t          |t                    rd|_        n%|j        j        j        d         }|j        }||_        d| j        _        dS )zRun some incoming markup through some parsing process,
        populating the `BeautifulSoup` object in `HTML5TreeBuilder.soup`.
        NzYou provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.   r4   )treeoverride_encodingr   )soup
parse_onlyr6   r7   r&   
HTMLParsercreate_treebuilderr+   parserdict
isinstancestrr,   parseoriginal_encoding	tokenizerstreamcharEncodingr;   )r9   r-   rG   extra_kwargsdocrL   s         r<   feedzHTML5TreeBuilder.feedu   s    9 TY%9%EM U    $$*ABBB&222)/&vv&#&& 	M 150LL,-fl622\22 fc"" 		6 %)C!! & 0 7 DQ G !2 6$5C!)-&&&r>   namespaceHTMLElementsc                 R    t          || j        | j                  | _        | j        S )zCalled by html5lib to instantiate the kind of class it
        calls a 'TreeBuilder'.

        :param namespaceHTMLElements: Whether or not to namespace HTML elements.

        :meta private:
        )store_line_numbers)r*   rC   rU   r+   )r9   rS   s     r<   rF   z#HTML5TreeBuilder.create_treebuilder   s2     #9!49AX#
 #
 #
 &&r>   fragmentc                     d|z  S )zSee `TreeBuilder`.z)<html><head></head><body>%s</body></html> )r9   rV   s     r<   test_fragment_to_documentz*HTML5TreeBuilder.test_fragment_to_document   s    :XEEr>   )NNN)__name__
__module____qualname____doc__r'   rJ   __annotations__r   r   r   r(   r
   r)   boolr	   r   r   r   r   r   r=   rR   rF   rY   rX   r>   r<   r   r   ;   sp           D##Z>Hhsm>>> !%$$$0000%i0000
 8<:>26* ** "*)!4* %-Y$7	*
 $J/* 
%
HY$7)9LdRS	T* * * *<#.: #.$ #. #. #. #.J'%)'	!' ' ' 'F# F# F F F F F Fr>   c            	            e Zd ZU ded<   eej                 ed<   	 	 ddeded         dedef fd	Z	ddZ
deeef         d
dfdZdeded
dfdZded
dfdZddZddZddZddZddd
efdZ xZS ) r*   r#   rC   rG   NTrS   rU   kwargsc                     |r|| _         n2t          j        dt          d           ddlm}  |	 d	d|i|| _         t          t          |                               |           d | _	        || _
        d S )
NaK  The optionality of the 'soup' argument to the TreeBuilderForHtml5lib constructor is deprecated as of Beautiful Soup 4.13.0: 'soup' is now required. If you can't pass in a BeautifulSoup object here, or you get this warning and it seems mysterious to you, please contact the Beautiful Soup developer team for possible un-deprecation.   r4   r   r"    html.parserrU   )rd   re   )rC   r6   r7   DeprecationWarningbs4r#   superr*   __init__rG   rU   )r9   rS   rC   rU   ra   r#   	__class__s         r<   ri   zTreeBuilderForHtml5lib.__init__   s      	DIIM ^"   
 *)))))
 &! 6HLR DI 	$d++445JKKK "4r>   r0   Elementc                 j    | j                                          t          | j         | j         d           S N)rC   resetrk   r9   s    r<   documentClassz$TreeBuilderForHtml5lib.documentClass   s*    	ty$)T222r>   tokenc                 4   t          t          |d                   }t          t          t                   |d                   }t          t          t                   |d                   }t          j        |||          }| j                            |           d S )Nr;   publicIdsystemId)r   rJ   r	   r   for_name_and_idsrC   object_was_parsed)r9   rq   r;   rs   rt   doctypes         r<   insertDoctypez$TreeBuilderForHtml5lib.insertDoctype   su    eFm,,"&x}eJ6G"H"H"&x}eJ6G"H"H*48DD	##G,,,,,r>   r;   	namespacec                     d }d }| j         6| j        r/| j         j        j                                        \  }}|J |dz
  }| j                            ||||          }t          || j        |          S )N   )
sourceline	sourcepos)rG   rU   rM   rN   positionrC   new_tagrk   )r9   r;   ry   r|   r}   tags         r<   elementClassz#TreeBuilderForHtml5lib.elementClass   s    $(
#'	;"t'>" %)K$9$@$I$I$K$K!J	(((!AIi)
i   
 
 sDIy111r>   dataTextNodec                 F    t          t          |          | j                  S rm   )r   r   rC   )r9   r   s     r<   commentClassz#TreeBuilderForHtml5lib.commentClass   s    ty111r>   c                     t                      )zThis is only used by html5lib HTMLParser.parseFragment(),
        which is never used by Beautiful Soup, only by the html5lib
        unit tests. Since we don't currently hook into those tests,
        the implementation is left blank.
        NotImplementedErrorro   s    r<   fragmentClassz$TreeBuilderForHtml5lib.fragmentClass   s     "###r>   c                     t                      zThis is only used by the html5lib unit tests. Since we
        don't currently hook into those tests, the implementation is
        left blank.
        r   ro   s    r<   getFragmentz"TreeBuilderForHtml5lib.getFragment       
 "###r>   nodec                 D    | j                             |j                   d S rm   )rC   appendelementr9   r   s     r<   appendChildz"TreeBuilderForHtml5lib.appendChild  s"     		&&&&&r>   c                     | j         S rm   )rC   ro   s    r<   getDocumentz"TreeBuilderForHtml5lib.getDocument
  s
    yr>   r   c                     t                      r   r   r9   r   s     r<   testSerializerz%TreeBuilderForHtml5lib.testSerializer  r   r>   )NT)r0   rk   r   rk   r0   N)r0   r#   )rZ   r[   r\   r^   r	   r&   rE   r_   r   ri   rp   r   rJ   rx   r   r   r   r   r   r   r   __classcell__)rj   s   @r<   r*   r*      s        
X())))
 +/#'	5 5#5 '5 !	5
 5 5 5 5 5 5B3 3 3 3-4S> -d - - - -2 2 2 2 2 2 2 2 2 2 2 2 2$ $ $ $$ $ $ $' ' ' '   $i $C $ $ $ $ $ $ $ $r>   r*   c                       e Zd ZU dZeed<   eed<   defdZdee	e
ef                  fdZde
dedd	fd
Zdee	e
ef                  fdZdee
         fdZdefdZde
defdZde
defdZd	S )AttrListz@Represents a Tag's attributes in a way compatible with html5lib.r   attrsc                 P    || _         t          | j         j                  | _        d S rm   )r   rH   r   r   s     r<   ri   zAttrList.__init__  s!    $,,--


r>   r0   c                 r    t          | j                                                                                  S rm   )listr   items__iter__ro   s    r<   r   zAttrList.__iter__  s*    DJ$$&&''00222r>   r;   valueNc                 v   | j         j        pi }||                    dg           v s0| j         j        |v rz||                    | j         j        g           v rXt	          |t
                    sCt	          |t                    sJ | j                             t          j	        |                    }|| j         |<   d S )N*)
r   cdata_list_attributesgetr;   rI   r   rJ   attribute_value_list_classr   findall)r9   r;   r   	list_attrs       r<   __setitem__zAttrList.__setitem__"  s     L6<"	9==b))))L**	dl&7<<<< eT** !%-----??$,U33  #Tr>   c                 N    t          | j                                                  S rm   )r   r   r   ro   s    r<   r   zAttrList.items3  s    DJ$$&&'''r>   c                 N    t          | j                                                  S rm   r   r   keysro   s    r<   r   zAttrList.keys6  s    DJOO%%&&&r>   c                 *    t          | j                  S rm   )lenr   ro   s    r<   __len__zAttrList.__len__9  s    4:r>   c                     | j         |         S rm   )r   r9   r;   s     r<   __getitem__zAttrList.__getitem__<  s    z$r>   c                 R    |t          | j                                                  v S rm   r   r   s     r<   __contains__zAttrList.__contains__?  s!    tDJOO--....r>   )rZ   r[   r\   r]   r!   r^   r   ri   r   r   rJ   r   r   r   r   r   intr   r   r_   r   rX   r>   r<   r   r     sG        JJLLL. . . . .3(5o)=#>? 3 3 3 3# #O # # # # #"(xc?&: ;< ( ( ( ('hsm ' ' ' '               / / / / / / / /r>   r   c                   l    e Zd ZU eed<   ded<   ee         ed<   edefd            Z	de
j        fdZdS )	BeautifulSoupNoder   r#   rC   ry   r0   c                     t                      )zReturn the html5lib constant corresponding to the type of
        the underlying DOM object.

        NOTE: This property is only accessed by the html5lib test
        suite, not by Beautiful Soup proper.
        r   ro   s    r<   nodeTypezBeautifulSoupNode.nodeTypeH  s     "###r>   c                     t                      rm   r   ro   s    r<   	cloneNodezBeautifulSoupNode.cloneNodeT  s    !###r>   N)rZ   r[   r\   r   r^   r	   r   propertyr   r   treebuilder_baseNoder   rX   r>   r<   r   r   C  s         
&&&&$# $ $ $ X$$+0 $ $ $ $ $ $r>   r   c                      e Zd ZU eed<   ee         ed<   dedddee         fdZdd
Zde	fdZ
eeeeef         f         Zeed<   eeef         Zeed<   dee         dd	fdZ ee
e          Z	 ddeded         dd	fdZ	 	 	 	 	 	 ddZddZddZdefdZdej        fdZdeee         ef         fdZ ee          Zd	S ) rk   r   ry   rC   r#   c                 z    t           j                            | |j                   || _        || _        || _        d S rm   )r   r   ri   r;   r   rC   ry   )r9   r   rC   ry   s       r<   ri   zElement.__init__\  s8     	&&tW\:::	"r>   r   r   r0   Nc                 4   d }t          |j                  t          u r
|j        x}}n|j        }| |_        |5|j        .t	          |t
                    s|j                                         || j        j        rxt          | j        j        d                   t          u rR| j        j        d         }| j        	                    ||z             }|
                    |           || j        _        d S t	          |t
                    r| j        	                    |          }| j        j        r| j                            d          }n-| j        j        | j                                        }n| j        }| j                            || j        |           d S )NF)parentmost_recent_element)typer   r    r   rI   rJ   extractcontentsrC   
new_stringreplace_with_most_recent_element_last_descendantnext_elementrv   )r9   r   string_childchildold_elementnew_elementr   s          r<   r   zElement.appendChildd  s   2600#'</L55LE (uc** ) L  """ $% %T\*2.//?BB
 ,/3K)..{\/IJJK$$[111-8DI***$$$ 3	,,T22
 |$ 	3&*l&C&CE&J&J##*6
 '+i&@&@&B&B##&*l#I''dl@S (     r>   c                 b    t          | j        t                    ri S t          | j                  S rm   )rI   r   r   r   ro   s    r<   getAttributeszElement.getAttributes  s+    dlG,, 	I%%%r>   _Html5libAttributeName_Html5libAttributes
attributesc                    |t          |          dk    rt          |                                          D ]+\  }}t          |t                    rt          | }||= |||<   ,t          t          |          }| j        j	        
                    | j        |           t          |                                          D ]\  }}|| j        |<   | j        j	                            | j                   d S d S d S Nr   )r   r   r   rI   tupler   r   r   rC   builder$_replace_cdata_list_attribute_valuesr;   r   set_up_substitutions)r9   r   r;   r   new_namenormalized_attributesvalue_or_valuess          r<   setAttributeszElement.setAttributes  s   !c*oo&9&9  $J$4$4$6$677 1 1edE** 12D9H"4(+0Jx( %))9:$F$F! IBB	0   *..C.I.I.K.K)L)L 5 5%o%4T"" I224<@@@@@; "!&9&9r>   r   insertBeforec                     t          | j                            |          | j                  }|r|                     ||           d S |                     |           d S rm   )r   rC   r   r   r   )r9   r   r   texts       r<   
insertTextzElement.insertText  sc     	,,T22DI>> 	#dL11111T"""""r>   refNodec                    | j                             |j                   }t          |j                   t          u r| j         j        rt          | j         j        |dz
                     t          u rf| j         j        |dz
           }t          |          t          u sJ | j                            ||j         z             }|                    |           d S | j                             ||j                    | |_	        d S )Nr{   )
r   indexr   r    r   rC   r   r   insertr   )r9   r   r   r   old_nodenew_strs         r<   r   zElement.insertBefore  s     ""7?33/11% 2T\*519566/II |,UQY7H>>_4444i**8dl+BCCG!!'*****Lt|444DKKKr>   c                 8    |j                                          d S rm   )r   r   r   s     r<   removeChildzElement.removeChild  s    r>   
new_parentc                 P   | j         }|j         }|j        }|                    dd          }t          |j                  dk    r|J |j        d         }|j        }n	d}|j        }|j        }t          |          dk    rr|d         }	|||	_        n||	_        ||	_        ||	|_        n|	|_        ||	|_        |d                             dd          }
|
J ||
_        ||
|_        d|
_        |D ]#}||_        |j        	                    |           $g |_        ||_        dS )z1Move all of this tag's children into another tag.Fr   Nr   T)is_initializedaccept_self)
r   next_siblingr   r   r   r   previous_elementprevious_siblingr   r   )r9   r   r   new_parent_elementfinal_next_elementnew_parents_last_descendantnew_parents_last_child(new_parents_last_descendant_next_element	to_appendfirst_childlast_childs_last_descendantr   s               r<   reparentChildrenzElement.reparentChildren  s    ,'/ %1&8&I&I%QV&W&W#!*++a// /:::%7%@%D"+8 54
 &*"7I7V4$	y>>A $A,K*6/J,,/A,+AK(*6;F+882="/%16A&3 +4B-*H*H$$ +I + +' /:::8 (4 8C 0 9I 8<'4 	6 	6E-EL'..u5555 1r>   c                 <    t          | j        j                  dk    S r   )r   r   r   ro   s    r<   
hasContentzElement.hasContent7  s    4<())A--r>   c                     | j                             | j        j        | j                  }t          || j         | j                  }| j        D ]\  }}||j        |<   |S rm   )rC   r   r   r;   ry   rk   r   )r9   r   r   keyr   s        r<   r   zElement.cloneNode<  s^    i 14>BBsDIt~66/ 	) 	)JC#(DOC  r>   c                 T    | j         t          d         | j        fS | j         | j        fS )Nhtml)ry   r   r;   ro   s    r<   getNameTuplezElement.getNameTupleC  s+    >!f%ty00>49,,r>   )r   r   r0   Nrm   )r   r   r   r   r0   Nr   )r   rk   r0   N) rZ   r[   r\   r!   r^   r	   r   ri   r   r   r   r   rJ   r   r   r   r   r   r   r   r   r   r   r   r   r_   r   r   r   r   r  	nameTuplerX   r>   r<   rk   rk   X  s        LLL&&&&##"1#>F}>U# # # #1 1 1 1f&x & & & & ).c5c?.B(CICCC &**@#*E%FFFFA1D(E A$ A A A A@ -77J HL# ##'/0C'D#	# # # #'2E	   $   L2 L2 L2 L2h.D . . . .
+0    -eH]$;S$@A - - - - &&IIIr>   rk   c                   *    e Zd ZU eed<   deddfdZdS )r   r   rC   r#   c                 b    t           j                            | d            || _        || _        d S rm   )r   r   ri   r   rC   )r9   r   rC   s      r<   ri   zTextNode.__init__O  s-    &&tT222			r>   N)rZ   r[   r\   r    r^   ri   rX   r>   r<   r   r   L  sD                r>   r   )5__license____all__typingr   r   r   r   r	   r
   r   r   r   typing_extensionsr   bs4._typingr   r   r   r   r   r   r6   bs4.builderr   r   r   r   r   bs4.elementr   r   r   r&   html5lib.constantsr   r   r   r    r!   rg   r#   html5lib.treebuildersr$   r   r   TreeBuilderr*   objectr   r   r   rk   r   rX   r>   r<   <module>r     s2   
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 ( ' ' ' ' '                                      
                   "!!!!!! : : : : : :pF pF pF pF pF pF pF pFfd$ d$ d$ d$ d$-9 d$ d$ d$N+/ +/ +/ +/ +/v +/ +/ +/\$ $ $ $ $(- $ $ $*q' q' q' q' q' q' q' q'h          r>   