
    Mh                     ,   U d Z dZddlZddlZddlZddlZddlZddlmZ ddl	m
Z
mZmZmZmZmZmZmZmZmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZm Z m!Z!m"Z" 	 ddl#m$Z$ dZ%n# e&$ r dZ%Y nw xY wej'        (                    d          duZ)	 ddl*Z+dZ,e+j-        j.        Z.n# e&$ r dZ,dZ.Y nw xY weZ/e"e         e0d<   dZ1e2e0d<    G d de3          Z4 G d de4          Z5 G d de5          Z6 G d de5          Z7 G d de6          Z8dS )zHelper classes for tests.MIT    N)BeautifulSoup)
AttributeValueListCharsetMetaAttributeValueCommentContentMetaAttributeValueDoctypePageElementPYTHON_SPECIFIC_ENCODINGSScript
StylesheetTag)SoupStrainer)XMLParsedAsHTMLWarning)_IncomingMarkup)TreeBuilder)HTMLParserTreeBuilder)AnyIterableOptionalTupleType)SelectorSyntaxErrorTFhtml5lib)r   default_builderu-
  A bare string
<!DOCTYPE xsl:stylesheet SYSTEM "htmlent.dtd">
<!DOCTYPE xsl:stylesheet PUBLIC "htmlent.dtd">
<div><![CDATA[A CDATA section where it doesn't belong]]></div>
<div><svg><![CDATA[HTML5 does allow CDATA sections in SVG]]></svg></div>
<div>A <meta> tag</div>
<div>A <br> tag that supposedly has contents.</br></div>
<div>AT&T</div>
<div><textarea>Within a textarea, markup like <b> tags and <&<&amp; should be treated as literal</textarea></div>
<div><script>if (i < 2) { alert("<b>Markup within script tags should be treated as literal.</b>"); }</script></div>
<div>This numeric entity is missing the final semicolon: <x t="pi&#241ata"></div>
<div><a href="http://example.com/</a> that attribute value never got closed</div>
<div><a href="foo</a>, </a><a href="bar">that attribute value was closed by the subsequent tag</a></div>
<! This document starts with a bogus declaration ><div>a</div>
<div>This document contains <!an incomplete declaration <div>(do you see it?)</div>
<div>This document ends with <!an incomplete declaration
<div><a style={height:21px;}>That attribute value was bogus</a></div>
<! DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">The doctype is invalid because it contains extra whitespace
<div><table><td nowrap>That boolean attribute had no value</td></table></div>
<div>Here's a nonexistent entity: &#foo; (do you see it?)</div>
<div>This document ends before the entity finishes: &gt
<div><p>Paragraphs shouldn't contain block display elements, but this one does: <dl><dt>you see?</dt></p>
<b b="20" a="1" b="10" a="2" a="3" a="4">Multiple values for the same attribute.</b>
<div><table><tr><td>Here's a table</td></tr></table></div>
<div><table id="1"><tr><td>Here's a nested table:<table id="2"><tr><td>foo</td></tr></table></td></div>
<div>This tag contains nothing but whitespace: <b>    </b></div>
<div><blockquote><p><b>This p tag is cut off by</blockquote></p>the end of the blockquote tag</div>
<div><table><div>This table contains bare markup</div></table></div>
<div><div id="1">
 <a href="link1">This link is never closed.
</div>
<div id="2">
 <div id="3">
   <a href="link2">This link is closed.</a>
  </div>
</div></div>
<div>This document contains a <!DOCTYPE surprise>surprise doctype</div>
<div><a><B><Cd><EFG>Mixed case tags are folded to lowercase</efg></CD></b></A></div>
<div><our☃>Tag name contains Unicode characters</our☃></div>
<div><a ☃="snowman">Attribute name contains Unicode characters</a></div>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
BAD_DOCUMENTc                      e Zd Zedee         fd            Zdedede	fdZ
dededefdZ	 dded	ee         ddfd
ZeZdeddfdZ	 ddededee         fdZdee         dee         ddfdZdee         dee         ddfdZdS )SoupTestreturnc                     t           S N)r   selfs    R/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/bs4/tests/__init__.pyr   zSoupTest.default_builderf   s        markupkwargsc                 V    |                     d| j                  }t          |fd|i|S )z*Build a Beautiful Soup object from markup.builder)popr   r   )r#   r&   r'   r)   s       r$   soupzSoupTest.soupj   s3    **Y(<==V??W????r%   c                 B     | j         di |                    |          S )z[Turn an HTML fragment into a document.

        The details depend on the builder.
         )r   test_fragment_to_document)r#   r&   r'   s      r$   document_forzSoupTest.document_foro   s+    
 $t#--f--GGOOOr%   Nto_parsecompare_parsed_toc                 ~   | j         }t          ||          }|t          |t                    sJ |}|                                |                     |          k    sJ t          d t          |j        	                                          D                       sJ |j
        gd |j        D             k    sJ dS )ziParse some markup using Beautiful Soup and verify that
        the output markup is as expected.
        r)   Nc              3   "   K   | ]
}|d k    V  dS )r   Nr-   ).0vs     r$   	<genexpr>z'SoupTest.assert_soup.<locals>.<genexpr>   s&      GGa16GGGGGGr%   c                     g | ]	}|j         
S r-   namer5   xs     r$   
<listcomp>z(SoupTest.assert_soup.<locals>.<listcomp>   s    &D&D&D!qv&D&D&Dr%   )r   r   
isinstancestrdecoder/   alllistopen_tag_countervaluesROOT_TAG_NAMEtagStack)r#   r0   r1   r)   objs        r$   assert_soupzSoupTest.assert_soupv   s     &Hg666$h,,,,, ( zz||t001BCCCCCC GG4(<(C(C(E(E#F#FGGGGGGGG !"&D&Ds|&D&D&DDDDDDDr%   elementc                 Z    d}|j         D ] }|r||j        k    sJ ||j        k    sJ |}!dS )zyEnsure that next_element and previous_element are properly
        set for all descendants of the given element.
        N)descendantsnext_elementprevious_element)r#   rI   earlieres       r$   assertConnectednesszSoupTest.assertConnectedness   s\     $ 	 	A 5G00000!"44444GG		 	r%   Fel_recursive_callc           	         d}|j         |j        $J d                    ||j        d                      |j        $J d                    ||j        d                      |j        $J d                    ||j        d                      d}d}d}t          |j                  dz
  }|j        D ]>}d}|dk    r|j         |j        |u s$J d                    ||j        |                      |j        |u s$J d                    ||j        |                      |j        $J d                    ||j        d                      n|j        |j        |dz
           u s2J d	                    ||j        |j        |dz
                                 |j        |dz
           j        |u s@J d
                    |j        |dz
           |j        |dz
           j        |                      |e|j        |u s/J d                    ||j        ||j         j                              |j        |u s$J d                    ||j        |                      t          |t                    rL|j        rE| 
                    |d          }|J |j        $J d
                    ||j        d                      ||}n|}||k    r+|j        $J d
                    ||j        d                      |dz  }@||n|}||}|s||z|}	 |,|j        $J d                    ||j        d                      nG|j        8|j        |j        u s)J d                    ||j        |j                              n|j         }udS |S )z.Ensure proper linkage throughout the document.Nz3Bad previous_element
NODE: {}
PREV: {}
EXPECTED: {}z3Bad previous_sibling
NODE: {}
PREV: {}
EXPECTED: {}z/Bad next_sibling
NODE: {}
NEXT: {}
EXPECTED: {}r      z/Bad next_element
NODE: {}
NEXT: {}
EXPECTED: {}z2Bad previous_sibling
NODE: {}
PREV {}
EXPECTED: {}z1Bad previous_sibling
NODE: {}
PREV {}
EXPECTED {}z-Bad next_sibling
NODE: {}
NEXT {}
EXPECTED {}z=Bad previous_element
NODE: {}
PREV {}
EXPECTED {}
CONTENTS {}z-Bad next_element
NODE: {}
NEXT {}
EXPECTED {}T)parentrM   formatprevious_siblingnext_siblinglencontentsrL   r>   r   linkage_validator)	r#   rQ   rR   
descendantidxchild
last_childlast_idxtargets	            r$   r[   zSoupTest.linkage_validator   s    
 9#++GNNB'  ,++
 #++GNNB'  ,++
 ''CJJBOT  (''
 
r{##a'[ H	 H	EJ axx9(5000KRRBOU  100
 ."444OVVu5r  544
 .66NUUu5t  766 *bk#'.BBBBIPP512;sQw3G  CBB
 Ka(5>>>ELLKa("+cAg*>*KU  ?>>
 ).*<<<Zaau5z5<CX  =<<
 #/5888IPP"J$;U  988
 %%% %. !33E4@@
!--- +33ELL
 7  433 %'

"
 h&..ELL5-t  /.. 1HCC(4

%=E 	5#4$&F'>*22IPPu14  322 (4*f.AAAAIPPu163F  BAA '$ 4 Lr%   tagsshould_matchc                 *    d |D             |k    sJ dS )zMake sure that the given tags have the correct text.

        This is used in tests that define a bunch of tags, each
        containing a single string, and then select certain strings by
        some mechanism.
        c                     g | ]	}|j         
S r-   stringr5   tags     r$   r=   z+SoupTest.assert_selects.<locals>.<listcomp>'  s    +++s
+++r%   Nr-   r#   rb   rc   s      r$   assert_selectszSoupTest.assert_selects   s)     ,+d+++|;;;;;;r%   c                 *    d |D             |k    sJ dS )zMake sure that the given tags have the correct IDs.

        This is used in tests that define a bunch of tags, each
        containing a single string, and then select certain strings by
        some mechanism.
        c                     g | ]
}|d          S idr-   rh   s     r$   r=   z/SoupTest.assert_selects_ids.<locals>.<listcomp>2  s    ***cD	***r%   Nr-   rj   s      r$   assert_selects_idszSoupTest.assert_selects_ids)  s)     +*T***l::::::r%   r!   )F)__name__
__module____qualname__propertyr   r   r   r   r   r   r+   r?   r/   r   rH   assertSoupEqualsr   rP   boolr
   r[   r   rk   rp   r-   r%   r$   r   r   e   s       k!2    X@? @c @m @ @ @ @
P3 P# P# P P P P MQE E'E<DSME	E E E E4 #	3 	4 	 	 	 	 05A AA(,A	+	A A A AF<8C= < <RV < < < <	;SM	;19#	;		; 	; 	; 	; 	; 	;r%   r   c                       e Zd Zej                            ddi  edg          ddgig          d             Zej                            d edg          ddgig          d	             Zd
 Z	d Z
d Zd ZdS )TreeBuilderSmokeTestmulti_valued_attributesNclass)b*notclassc                 ^    d}|                      ||          }|j        d         dk    sJ d S )NzC<html xmlns="http://www.w3.org/1999/xhtml"><a class="a b c"></html>ry   rz   za b cr+   ar#   ry   r&   r+   s       r$   test_attribute_not_multi_valuedz4TreeBuilderSmokeTest.test_attribute_not_multi_valued8  s=     Wyy9PyQQvg'))))))r%   )r   c                 b    d}|                      ||          }|j        d         g dk    sJ d S )Nz<a class="a b c">r   rz   )r   r{   cr   r   s       r$   test_attribute_multi_valuedz0TreeBuilderSmokeTest.test_attribute_multi_valued@  s@     %yy9PyQQvg///111111r%   c                 b    d}|                      |           d}|                      |           d S )Nz<![if word]>content<![endif]>z<!DOCTYPE html]ff>)r+   r#   r&   s     r$   test_invalid_doctypez)TreeBuilderSmokeTest.test_invalid_doctypeH  s8     1		&%		&r%   c                     d}|                      |t          d                    }t          d |j        D                       rJ d S )Nz<!DOCTYPE html>
<html>
</html>htmlr9   
parse_onlyc              3   @   K   | ]}t          |t                    V  d S r!   )r>   r	   r;   s     r$   r7   z=TreeBuilderSmokeTest.test_doctype_filtered.<locals>.<genexpr>S  s,      HH!z!W--HHHHHHr%   )r+   r   anyrK   r#   r&   r+   s      r$   test_doctype_filteredz*TreeBuilderSmokeTest.test_doctype_filteredP  sS    3yyLf,E,E,EyFFHHt7GHHHHHHHHHHr%   c                 @    G d dt                     }d}|                     |          }|                     ||          }|j        }t	          |j        |          sJ d|d         k    sJ d|d	<   d|d	         k    sJ d
}||                                k    sJ d S )Nc                   (     e Zd Zdedef fdZ xZS )NTreeBuilderSmokeTest.test_custom_attribute_dict_class.<locals>.MyAttributeDictkeyvaluec                 L    t                                          |d           d S )N
OVERRIDDEN)super__setitem__)r#   r   r   	__class__s      r$   r   zZTreeBuilderSmokeTest.test_custom_attribute_dict_class.<locals>.MyAttributeDict.__setitem__W  s%     ##C66666r%   )rq   rr   rs   r?   r   r   __classcell__r   s   @r$   MyAttributeDictr   V  sK        7s 73 7 7 7 7 7 7 7 7 7 7r%   r   "<a attr1="val1" attr2="val2">f</a>)attribute_dict_classr3   r   attr1Tattr3zA<a attr1="OVERRIDDEN" attr2="OVERRIDDEN" attr3="OVERRIDDEN">f</a>)dictr   r+   r   r>   attrsr@   )r#   r   r&   r)   r+   ri   expects          r$    test_custom_attribute_dict_classz5TreeBuilderSmokeTest.test_custom_attribute_dict_classU  s    	7 	7 	7 	7 	7d 	7 	7 	7 6&&O&LLyyy11f#)_55555s7|++++Gs7|++++T%%%%%%r%   c                 &    G d dt                     }|                     dt          dg          i|          }d}|                     ||          }|j        }|d         d	k    sJ |d         d
dgk    sJ t          |d         |          sJ d S )Nc                        e Zd Z fdZ xZS )_TreeBuilderSmokeTest.test_custom_attribute_value_list_class.<locals>.MyCustomAttributeValueListc                 d     t                      j        |i | |                     d           d S )Nextra)r   __init__append)r#   argsr'   r   s      r$   r   zhTreeBuilderSmokeTest.test_custom_attribute_value_list_class.<locals>.MyCustomAttributeValueList.__init__j  s7      $1&111G$$$$$r%   )rq   rr   rs   r   r   r   s   @r$   MyCustomAttributeValueListr   i  s8        % % % % % % % % %r%   r   r|   attr2)ry   attribute_value_list_classr   r3   r   val1val2r   )r   r   setr+   r   r>   )r#   r   r)   r&   r+   ri   s         r$   &test_custom_attribute_value_list_classz;TreeBuilderSmokeTest.test_custom_attribute_value_list_classh  s    	% 	% 	% 	% 	%); 	% 	% 	%
 &&%(#wi..$9'A ' 
 
 6yyy11f7|v%%%%7|00000#g,(BCCCCCCCr%   )rq   rr   rs   pytestmarkparametrizer   r   r   r   r   r   r   r-   r%   r$   rx   rx   5  s         [!D"ddgY.?.?.?#
|AT#U * * *
 [!DDG9$5$5$5gY7G#H 2 2 2
  I I I
& & &&D D D D Dr%   rx   c            	          e Zd ZdZd Zd Zd Zd Zdeddfd	Z		 dHdedede
eef         fdZd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z d  Z!d! Z"d" Z#d# Z$d$ Z%d% Z&d& Z'd' Z(d( Z)d) Z*d* Z+d+ Z,d, Z-d- Z.d. Z/d/ Z0d0 Z1d1 Z2d2 Z3d3 Z4d4 Z5d5 Z6d6 Z7d7 Z8d8 Z9d9 Z:d: Z;d; Z<d< Z=d= Z>d> Z?d? Z@d@ ZAdA ZBdB ZCdC ZDdD ZEdE ZFdF ZGdG ZHdS )IHTMLTreeBuilderSmokeTestaC  A basic test of a treebuilder's competence.

    Any HTML treebuilder, present or future, should be able to pass
    these tests. With invalid markup, there's room for interpretation,
    and different parsers can handle it differently. But with the
    markup in these tests, there's not much room for interpretation.
    c                     dD ]7}|                      d          }|                    |          }|j        du sJ 8|                     dd           |                     dd           dS )zmVerify that all HTML4 and HTML5 empty element (aka void element) tags
        are handled correctly.
        )areabasebrcolembedhrimginputkeygenlinkmenuitemmetaparamsourcetrackwbrspacerframe Tz<br/><br/><br/>z<br /><br /><br />N)r+   new_tagis_empty_elementrH   )r#   r:   r+   r   s       r$   test_empty_element_tagsz0HTMLTreeBuilderSmokeTest.test_empty_element_tags  s    
 	4 	4D( 99R==Dll4((G+t33333*,=>>>-/@AAAAAr%   c                    |                      d          }t          |j        j        t                    sJ t          |j        j        t                    sJ |                      d          }t          |j        j        t                    sJ |j        j        dk    sJ t          |j        j        t                    sJ d S )Nz7<style>Some CSS</style><script>Some Javascript</script>z<style><!--Some CSS--></style>z<!--Some CSS-->)r+   r>   stylerg   r   scriptr   r#   r+   s     r$   test_special_string_containersz7HTMLTreeBuilderSmokeTest.test_special_string_containers  s    yyRSS$*+Z88888$+,f55555yy9::$*+Z88888 z $55555$*+Z8888888r%   c                    |                      d          }t          j        |t          j                  }t          j        |          }|j        t          k    sJ |                                |                                k    sJ d S )N<a><b>foo</a>)r+   pickledumpsHIGHEST_PROTOCOLloadsr   r   r@   r#   treedumpedloadeds       r$   !test_pickle_and_unpickle_identityz:HTMLTreeBuilderSmokeTest.test_pickle_and_unpickle_identity  ss     yy))dF$;<<f%%=0000}}$++--//////r%   c                     d}|                      |          }t          j        |t          j                  }t          j        |          }|j        j        du sJ d S )Nz
<!DOCTYPE html>
<html lang="en">
<head><title>blabla</title></head>
<body><?xml encoding="utf-8" ?><html></html></body>
</html>
F)r+   r   r   r   r   r)   is_xml)r#   r&   r+   pickleds       r$   #test_pickle_and_unpickle_bad_markupz<HTMLTreeBuilderSmokeTest.test_pickle_and_unpickle_bad_markup  s[     yy  ,tV%<==|G$$|"e++++++r%   doctype_fragmentr   Nc                 &   |                      |          \  }}|j        d         }|j        t          k    sJ ||k    sJ |                    d          dt          |                   |k    sJ |j        J |j        j        d         dk    sJ dS )z8Assert that a given doctype string is handled correctly.r   utf8Nfoo_document_with_doctyperZ   r   r	   encoderY   pr#   r   doctype_strr+   doctypes        r$   assertDoctypeHandledz-HTMLTreeBuilderSmokeTest.assertDoctypeHandled  s     778HIIT -" G++++*****{{6""#5S%5%5#56+EEEE v!!!vq!U******r%   DOCTYPEdoctype_stringc                 v    d|d|d}|dz   }|                      |          }|                    d          |fS )z5Generate and parse a document with the given doctype.z<! >z
<p>foo</p>r   r+   r   )r#   r   r   r   r&   r+   s         r$   r   z/HTMLTreeBuilderSmokeTest._document_with_doctype  sM      !/0@0@0@A>)yy  ~~f%%t++r%   c                 Z    |                      d           |                      d           dS )z?Make sure normal, everyday HTML doctypes are handled correctly.r   z4html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"Nr   r"   s    r$   test_normal_doctypesz-HTMLTreeBuilderSmokeTest.test_normal_doctypes  s;    !!&)))!!B	
 	
 	
 	
 	
r%   c                 ~    |                      d          }|j        d         }d|                                k    sJ d S )Nz
<!DOCTYPE>r   r   )r+   rZ   strip)r#   r+   r   s      r$   test_empty_doctypez+HTMLTreeBuilderSmokeTest.test_empty_doctype  s>    yy&&-"W]]__$$$$$$r%   c                     dD ]}|                      d|          \  }}|j        d         }|j        t          k    sJ |dk    sJ |                    d          d t          |                   dk    sJ |j        j        d         dk    sJ d S )N)r   DocTyper   r   r   s   <!DOCTYPE html>r   r   r   s        r$   test_mixed_case_doctypez0HTMLTreeBuilderSmokeTest.test_mixed_case_doctype  s     6 	/ 	/ $ ; ;FDT U UK mA&G$////f$$$$;;v&&'9[)9)9'9:>PPPPP 6?1%.....	/ 	/r%   c                 4    d}|                      |           d S )Nznhtml PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"r   )r#   r   s     r$   test_public_doctype_with_urlz5HTMLTreeBuilderSmokeTest.test_public_doctype_with_url  s$     C!!'*****r%   c                 0    |                      d           d S )Nz$foo SYSTEM "http://www.example.com/"r   r"   s    r$   test_system_doctypez,HTMLTreeBuilderSmokeTest.test_system_doctype  s    !!"HIIIIIr%   c                 0    |                      d           d S )Nz#xsl:stylesheet SYSTEM "htmlent.dtd"r   r"   s    r$   test_namespaced_system_doctypez7HTMLTreeBuilderSmokeTest.test_namespaced_system_doctype      !!"GHHHHHr%   c                 0    |                      d           d S )Nz#xsl:stylesheet PUBLIC "htmlent.dtd"r   r"   s    r$   test_namespaced_public_doctypez7HTMLTreeBuilderSmokeTest.test_namespaced_public_doctype  r  r%   c                 $   d}t          j        d          5 }|                     |          }ddd           n# 1 swxY w Y   |                    d                              dd          |                    dd          k    sJ |g k    sJ dS )zJA real XHTML document should come out more or less the same as it went in.   <?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><title>Hello.</title></head>
<body>Goodbye.</body>
</html>TrecordNutf-8   
r%   )warningscatch_warningsr+   r   replacer#   r&   wr+   s       r$   test_real_xhtml_documentz1HTMLTreeBuilderSmokeTest.test_real_xhtml_document  s     $D111 	%Q99V$$D	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	%{{7##++E3776>>%QT;U;UUUUU Bwwwwww   :>>c                     d}t          j        d          5 }|                     |          }d d d            n# 1 swxY w Y   dt          |                    d                    k    sJ g |k    sJ d S )Ns.   <ns1:foo>content</ns1:foo><ns1:foo/><ns2:foo/>Tr     zns1:foo)r  r  r+   rY   find_allr  s       r$   test_namespaced_htmlz-HTMLTreeBuilderSmokeTest.test_namespaced_html  s     G$D111 	%Q99V$$D	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% Ci00111111 Qwwwwwwr  c                 <   d}t          j        d          5 }|                     |          }|j        j        dk    sJ 	 d d d            n# 1 swxY w Y   |\  }t          |j        t                    sJ t          |j                  t          j	        k    sJ d S )Ns7   <?xml version="1.0" encoding="utf-8"?><tag>string</tag>Tr  rg   )
r  r  r+   ri   rg   r>   messager   r?   MESSAGE)r#   r&   r  r+   warnings        r$   test_detect_xml_parsed_as_htmlz7HTMLTreeBuilderSmokeTest.test_detect_xml_parsed_as_html$  s     P$D111 	/Q99V$$D8?h.....	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	'/+ABBBBB7?##'='EEEEEEEs   (AAAc                     d}|                      |          }||                                k    sJ d}|                      |          }||                    d          k    sJ d S )Nz<?PITarget PIContent?>s   <?PITarget PIContent?>r   )r+   r@   r   r   s      r$   test_processing_instructionz4HTMLTreeBuilderSmokeTest.test_processing_instruction4  sl    
 .yy  &&&&.yy  V,,,,,,,,r%   c                 8    t          j        | j                   dS )zMake sure you can copy the tree builder.

        This is important because the builder is part of a
        BeautifulSoup object, and we want to be able to copy that.
        N)copydeepcopyr   r"   s    r$   test_deepcopyz&HTMLTreeBuilderSmokeTest.test_deepcopyA  s     	d*+++++r%   c                     |                      d          }|j        j        rJ t          |j                  dk    sJ dS )zA <p> tag is never designated as an empty-element tag.

        Even if the markup shows it as an empty-element tag, it
        shouldn't be presented that way.
        <p/><p></p>N)r+   r   r   r?   r   s     r$   !test_p_tag_is_never_empty_elementz:HTMLTreeBuilderSmokeTest.test_p_tag_is_never_empty_elementI  sE     yy  6****46{{i''''''r%   c                     |                      dd           |                      dd           |                      dd           dS )zA tag that's not closed by the end of the document should be closed.

        This applies to all tags except empty-element tags.
        <p>r(  z<b>z<b></b>z<br><br/>NrH   r"   s    r$   test_unclosed_tags_get_closedz6HTMLTreeBuilderSmokeTest.test_unclosed_tags_get_closedS  sL    
 		***	***)))))r%   c                     |                      d          }|j        j        sJ t          |j                  dk    sJ dS )zA <br> tag is designated as an empty-element tag.

        Some parsers treat <br></br> as one <br/> tag, some parsers as
        two tags, but it should always be an empty-element tag.
        z	<br></br>r,  N)r+   r   r   r?   r   s     r$   #test_br_is_always_empty_element_tagz<HTMLTreeBuilderSmokeTest.test_br_is_always_empty_element_tag]  sE     yy%%w''''47||w&&&&&&r%   c                 0    |                      d           d S )Nz<em><em></em></em>r-  r"   s    r$   test_nested_formatting_elementsz8HTMLTreeBuilderSmokeTest.test_nested_formatting_elementsg  s    -.....r%   c                 v    d}|                      |          }d|                    d          d         k    sJ d S )Nz<!DOCTYPE html>
<html>
<head>
<title>Ordinary HEAD element test</title>
</head>
<script type="text/javascript">
alert("Help!");
</script>
<body>
Hello, world!
</body>
</html>
ztext/javascriptr   type)r+   find)r#   r   r+   s      r$   test_double_headz)HTMLTreeBuilderSmokeTest.test_double_headj  sB     yy DIIh$7$7$???????r%   c                 :   d}|                      |           |                     |          }|                    d          }|j        t          k    sJ |                    d          }||j        k    sJ |                    d          }||j        k    sJ d S )Nz<p>foo<!--foobar-->baz</p>foobarrf   r   baz)rH   r+   r5  r   r   rL   rM   )r#   r&   r+   commentr   r9  s         r$   test_commentz%HTMLTreeBuilderSmokeTest.test_comment{  s    -   yy  ))8),, G++++ iiui%%#*****iiui%%#.......r%   c                    d}d}|                      |           |                      |           |                     |          }|j                                        |k    sJ |                     |          }|j                                        |k    sJ |                     d          }|j                                        dk    sJ dS )zWhitespace must be preserved in <pre> and <textarea> tags,
        even if that would mean not prettifying the markup.
        z<pre>a   z</pre>
z <textarea> woo
woo  </textarea>
z<textarea></textarea>z<textarea></textarea>
N)rH   r+   preprettifytextarea)r#   
pre_markuptextarea_markupr+   s       r$   -test_preserved_whitespace_in_pre_and_textareazFHTMLTreeBuilderSmokeTest.test_preserved_whitespace_in_pre_and_textarea  s     *
>$$$)))yy$$x  ""j0000yy))}%%''?::::yy011}%%''+DDDDDDDr%   c                     d}|                      |           d}|                      |           d}|                      |           dS )z+Inline elements can be nested indefinitely.z<b>Inside a B tag</b>z!<p>A <i>nested <b>tag</b></i></p>z/<p>A <a>doubly <i>nested <b>tag</b></i></a></p>Nr-  )r#   b_tagnested_b_tagdouble_nested_b_tags       r$   test_nested_inline_elementsz4HTMLTreeBuilderSmokeTest.test_nested_inline_elements  sU    ':&&&O,-----r%   c                     |                      d          }|j        }|j        j        j        dk    sJ |j        j        dk    sJ dS )zBlock elements can be nested.z*<blockquote><p><b>Foo</b></p></blockquote>FooN)r+   
blockquoter   r{   rg   )r#   r+   rJ  s      r$    test_nested_block_level_elementsz9HTMLTreeBuilderSmokeTest.test_nested_block_level_elements  sO    yyEFF_
|~$----|"e++++++r%   c                 `    d}|                      |d           |                      d           dS )z$One table can go inside another one.z[<table id="1"><tr><td>Here's another table:<table id="2"><tr><td>foo</td></tr></table></td>zh<table id="1"><tr><td>Here's another table:<table id="2"><tr><td>foo</td></tr></table></td></tr></table>z{<table><thead><tr><td>Foo</td></tr></thead><tbody><tr><td>Bar</td></tr></tbody><tfoot><tr><td>Baz</td></tr></tfoot></table>Nr-  r   s     r$   test_correctly_nested_tablesz5HTMLTreeBuilderSmokeTest.test_correctly_nested_tables  sW     	 	!	
 	
 	
 	;	
 	
 	
 	
 	
r%   c                     d}|                      |          }ddg|j        d         k    sJ |j        |                    dd          k    sJ d S )Nz<div class=" foo bar	 "></a>r   barrz   divzfoo bar)class_)r+   rP  r5  r   s      r$   *test_multivalued_attribute_with_whitespacezCHTMLTreeBuilderSmokeTest.test_multivalued_attribute_with_whitespace  sa     0yy  u~'!22222 x499U99========r%   c                 f    d}|                      |          }dg|j        j        d         k    sJ d S )Nz1<table><div><div class="css"></div></div></table>cssrz   )r+   rP  r   s      r$   (test_deeply_nested_multivalued_attributezAHTMLTreeBuilderSmokeTest.test_deeply_nested_multivalued_attribute  s=     Eyy  w$(,w///////r%   c                 ^    d}|                      |          }ddg|j        d         k    sJ d S )Nz<html class="a b"></html>r   r{   rz   )r+   r   r   s      r$   "test_multivalued_attribute_on_htmlz;HTMLTreeBuilderSmokeTest.test_multivalued_attribute_on_html  s<     -yy  SzTYw///////r%   c                 2    |                      dd           d S )Nz<a b="<a>"></a>z<a b="&lt;a&gt;"></a>r-  r"   s    r$   3test_angle_brackets_in_attribute_values_are_escapedzLHTMLTreeBuilderSmokeTest.test_angle_brackets_in_attribute_values_are_escaped  s     *,CDDDDDr%   c                 2    |                      dd           d S )Nz$<p>&bull; AT&T is in the s&p 500</p>u)   <p>• AT&amp;T is in the s&amp;p 500</p>r-  r"   s    r$   3test_strings_resembling_character_entity_referenceszLHTMLTreeBuilderSmokeTest.test_strings_resembling_character_entity_references  s,     	2:	
 	
 	
 	
 	
r%   c                 2    |                      dd           d S )Nz<p>Bob&apos;s Bar</p>z<p>Bob's Bar</p>r-  r"   s    r$   test_apos_entityz)HTMLTreeBuilderSmokeTest.test_apos_entity  s*    #	
 	
 	
 	
 	
r%   c                 X    d}|                      |          }d|j        j        k    sJ d S )Nz%<p>&#147;Hello&#148; &#45;&#9731;</p>u   “Hello” -☃r+   r   rg   r   s      r$   *test_entities_in_foreign_document_encodingzCHTMLTreeBuilderSmokeTest.test_entities_in_foreign_document_encoding  s5     9yy  !TV]222222r%   c                     d}|                      d|           |                      d|           |                      d|           |                      d|           d S )Nu   <p id="piñata"></p>z<p id="pi&#241;ata"></p>z<p id="pi&#xf1;ata"></p>z<p id="pi&#Xf1;ata"></p>z<p id="pi&ntilde;ata"></p>r-  r#   r   s     r$   0test_entities_in_attributes_converted_to_unicodezIHTMLTreeBuilderSmokeTest.test_entities_in_attributes_converted_to_unicode  si    H3V<<<3V<<<3V<<<5v>>>>>r%   c                     d}|                      d|           |                      d|           |                      d|           |                      d|           d S )Nu   <p>piñata</p>z<p>pi&#241;ata</p>z<p>pi&#xf1;ata</p>z<p>pi&#Xf1;ata</p>z<p>pi&ntilde;ata</p>r-  rb  s     r$   *test_entities_in_text_converted_to_unicodezCHTMLTreeBuilderSmokeTest.test_entities_in_text_converted_to_unicode  si    B-v666-v666-v666/88888r%   c                 2    |                      dd           d S )Nz#<p>I said &quot;good day!&quot;</p>z<p>I said "good day!"</p>r-  r"   s    r$   ,test_quot_entity_converted_to_quotation_markzEHTMLTreeBuilderSmokeTest.test_quot_entity_converted_to_quotation_mark  s*    13N	
 	
 	
 	
 	
r%   c                     d}|                      d|           |                      d|           |                      d|           d S )Nu   �z&#10000000000000;z&#x10000000000000;z&#1000000000;r-  rb  s     r$   test_out_of_range_entityz1HTMLTreeBuilderSmokeTest.test_out_of_range_entity  sQ    ,,f555-v666&11111r%   c                     |                      d          }d|j        j        j        j        k    sJ d|j        j        k    sJ |                     |           dS )zDMostly to prevent a recurrence of a bug in the html5lib treebuilder.z!<html><h2>
foo</h2><p></p></html>r   N)r+   h2rg   rL   r:   r   rP   r   s     r$   test_multipart_stringsz/HTMLTreeBuilderSmokeTest.test_multipart_strings  s^    yy=>>dgn166666dfk!!!!  &&&&&r%   c                     d}|                      |          }d|j                                        k    sJ d}|                      |          }d|j                                        k    sJ d S )Nz<p>a &nosuchentity b</p>z<p>a &amp;nosuchentity b</p>z<p>a &nosuchentity; b</p>z<p>a &amp;nosuchentity; b</p>)r+   r   r@   r   s      r$   test_invalid_html_entityz1HTMLTreeBuilderSmokeTest.test_invalid_html_entity  sl    
 ,yy  -@@@@,yy  .$&--//AAAAAAr%   c                 z    d}|                      |          }|j        j        J |                     |           dS )8Prevent recurrence of a bug in the html5lib treebuilder.z?<html><head></head>
  <link></link>
  <body>foo</body>
</html>
N)r+   r   bodyrP   r#   contentr+   s      r$   #test_head_tag_between_head_and_bodyz<HTMLTreeBuilderSmokeTest.test_head_tag_between_head_and_body,  sF    
 yy!!y~)))  &&&&&r%   c                 h    d}|                      |          }|                     |j                   dS )rp  z<!DOCTYPE html>
<html>
 <body>
   <article id="a" >
   <div><a href="1"></div>
   <footer>
     <a href="2"></a>
   </footer>
  </article>
  </body>
</html>
N)r+   rP   articlerr  s      r$   test_multiple_copies_of_a_tagz6HTMLTreeBuilderSmokeTest.test_multiple_copies_of_a_tag7  s8     yy!!  .....r%   c                     d}|                      |          }||                                k    sJ d|j        d         k    sJ d|j        d         k    sJ d|j        d         k    sJ dS )	zParsers don't need to *understand* namespaces, but at the
        very least they should not choke on namespaces or lose
        data.s   <html xmlns="http://www.w3.org/1999/xhtml" xmlns:mathml="http://www.w3.org/1998/Math/MathML" xmlns:svg="http://www.w3.org/2000/svg"><head></head><body><mathml:msqrt>4</mathml:msqrt><b svg:fill="red"></b></body></html>http://www.w3.org/1999/xhtmlxmlns"http://www.w3.org/1998/Math/MathMLzxmlns:mathmlhttp://www.w3.org/2000/svgz	xmlns:svgN)r+   r   r   r   s      r$   test_basic_namespacesz.HTMLTreeBuilderSmokeTest.test_basic_namespacesH  s    
 nyy  &&&&-71CCCCC3ty7PPPPP+ty/EEEEEEEr%   c                 ^    d}|                      |          }ddg|j        d         k    sJ d S )Ns   <a class="foo bar">r   rO  rz   r   r   s      r$   -test_multivalued_attribute_value_becomes_listzFHTMLTreeBuilderSmokeTest.test_multivalued_attribute_value_becomes_listT  s9    'yy  u~000000r%   c                 X    d}|                      |          }d|j        j        k    sJ d S )NuD   <html><head><meta encoding="euc-jp"></head><body>Sacré bleu!</body>   Sacré bleu!)r+   rq  rg   r   s      r$   test_can_parse_unicode_documentz8HTMLTreeBuilderSmokeTest.test_can_parse_unicode_document`  s7     yyy  49#3333333r%   c                     t          d          }|                     d|          }|                                dk    sJ dS )z2Parsers should be able to work with SoupStrainers.r{   z&A <b>bold</b> <meta/> <i>statement</i>r   z<b>bold</b>N)r   r+   r@   )r#   strainerr+   s      r$   test_soupstrainerz*HTMLTreeBuilderSmokeTest.test_soupstrainerh  sD    $$yyAhyWW{{}}------r%   c                 2    |                      dd           d S )Nz<foo attr='bar'></foo>z<foo attr="bar"></foo>r-  r"   s    r$   7test_single_quote_attribute_values_become_double_quoteszPHTMLTreeBuilderSmokeTest.test_single_quote_attribute_values_become_double_quotesn  s     13KLLLLLr%   c                 4    d}|                      |           d S )N'<foo attr='bar "brawls" happen'>a</foo>r-  )r#   texts     r$   7test_attribute_values_with_nested_quotes_are_left_alonezPHTMLTreeBuilderSmokeTest.test_attribute_values_with_nested_quotes_are_left_aloneq  s!    <r%   c                     d}|                      |          }d|j        d<   |                     |j                                        d           d S )Nr  zBrawls happen at "Bob's Bar"attrz:<foo attr="Brawls happen at &quot;Bob's Bar&quot;">a</foo>)r+   r   rH   r@   )r#   r  r+   s      r$   :test_attribute_values_with_double_nested_quotes_get_quotedzSHTMLTreeBuilderSmokeTest.test_attribute_values_with_double_nested_quotes_get_quotedu  sV    <yy:HOOM	
 	
 	
 	
 	
r%   c                 ^    |                      dd           |                      dd           d S )Nz+<this is="really messed up & stuff"></this>z/<this is="really messed up &amp; stuff"></this>z.<a href="http://example.org?a=1&b=2;3">foo</a>z2<a href="http://example.org?a=1&amp;b=2;3">foo</a>r-  r"   s    r$   .test_ampersand_in_attribute_value_gets_escapedzGHTMLTreeBuilderSmokeTest.test_ampersand_in_attribute_value_gets_escaped~  sJ    9=	
 	
 	

 	<@	
 	
 	
 	
 	
r%   c                 0    |                      d           d S )Nz/<a href="http://example.org?a=1&amp;b=2;3"></a>r-  r"   s    r$   7test_escaped_ampersand_in_attribute_value_is_left_alonezPHTMLTreeBuilderSmokeTest.test_escaped_ampersand_in_attribute_value_is_left_alone  s    JKKKKKr%   c                 :    d}d}|                      ||           d S )N-<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>#   <p>&lt;&lt;sacré bleu!&gt;&gt;</p>r-  )r#   r  expecteds      r$   1test_entities_in_strings_converted_during_parsingzJHTMLTreeBuilderSmokeTest.test_entities_in_strings_converted_during_parsing  s/     ?R 	 	x(((((r%   c                 \    d}|                      |d          }|j        j        dk    sJ d S )Ns   <p>Foo</p>zwindows-1252from_encodingu	   ‘Foo’r_  )r#   quoter+   s      r$   )test_smart_quotes_converted_on_the_way_inzBHTMLTreeBuilderSmokeTest.test_smart_quotes_converted_on_the_way_in  sL     &yyny==FMQR R R R R Rr%   c                 T    |                      d          }|j        j        dk    sJ d S )Nz<a>&nbsp;&nbsp;</a>u     )r+   r   rg   r   s     r$   0test_non_breaking_spaces_converted_on_the_way_inzIHTMLTreeBuilderSmokeTest.test_non_breaking_spaces_converted_on_the_way_in  s0    yy.//v} 8888888r%   c                     d}d                     d          }|                     |          }|j                             d          |k    sJ d S )Nr  r  r  )r   r+   r   )r#   r  r  r+   s       r$   &test_entities_converted_on_the_way_outz?HTMLTreeBuilderSmokeTest.test_entities_converted_on_the_way_out  sU    >Y``
 
 yyv}}W%%111111r%   c                     d}|                     d          }|                     |          }|                     d          }|                    dd          }|                     d          }||k    sJ d S )Nu   <html><head><meta content="text/html; charset=ISO-8859-1" http-equiv="Content-type"/></head><body><p>Sacré bleu!</p></body></html>z
iso-8859-1r  z
ISO-8859-1)r   r+   r  )r#   unicode_htmliso_latin_htmlr+   resultr  s         r$   test_real_iso_8859_documentz4HTMLTreeBuilderSmokeTest.test_real_iso_8859_document  s    
 ~ &,,\:: yy(( W%%
  ''g>> ??7++ !!!!!!r%   c                    d}|                     d          }|                     |          }|                    d          |                    d          k    sJ |                    d          |                    d          k    sJ d S )Nsk   <html><head></head><body><pre>Shift-JISŃR[fBOꂽ{̃t@CłB</pre></body></html>	shift-jisr  euc_jp)r@   r+   r   )r#   shift_jis_htmlr  r+   s       r$   test_real_shift_jis_documentz5HTMLTreeBuilderSmokeTest.test_real_shift_jis_document  s    $ 	 &,,[99yy&& {{7##|':':7'C'CCCCC{{8$$(;(;H(E(EEEEEEEr%   c                     d}|                      |d          }|j        dv sJ |                    d          |                    d                              d          k    sJ d S )Ns   <html><head><title>Hebrew (ISO 8859-8) in Visual Directionality</title></head><body><h1>Hebrew (ISO 8859-8) in Visual Directionality</h1></body></html>	iso8859-8r  )r  z
iso-8859-8r  )r+   original_encodingr   r@   )r#   hebrew_documentr+   s      r$   test_real_hebrew_documentz2HTMLTreeBuilderSmokeTest.test_real_hebrew_document  s     EyyyDD %)DDDDD{{7##"";//66w??
 
 
 
 
 
r%   c                 V   d}d|z  }|                      |          }|                    dddi          }|d         }d|k    sJ t          |t                    sJ d|                    d	          k    sJ d
|                    d	          v sJ d|                    d          v sJ d S )NzE<meta content="text/html; charset=x-sjis" http-equiv="Content-type"/>j<html><head>
%s
<meta http-equiv="Content-language" content="ja"/></head><body>Shift-JIS markup goes here.r   z
http-equivzContent-typers  ztext/html; charset=x-sjisztext/html; charset=utf8r   s   charset=utf8s   charset=shift-jisr  )r+   r5  r>   r   substitute_encodingr   )r#   meta_tagr  r+   parsed_metars  s         r$   'test_meta_tag_reflects_current_encodingz@HTMLTreeBuilderSmokeTest.test_meta_tag_reflects_current_encoding  s     W 	7 	
 yy(( ii~(FGGi(*g5555 '#<===== )G,G,G,O,OOOOO +"4"4V"<"<<<<<#{'9'9+'F'FFFFFFFr%   c                 T   d}d|z  }|                      |          }|                    dd          }|d         }d|k    sJ t          |t                    sJ d|                    d          k    sJ d	|                    d          v sJ d
|                    d          v sJ d S )Nz'<meta id="encoding" charset="x-sjis" />r  r   encodingrn   charsetzx-sjisr   s   charset="utf8"s   charset="shift-jis"r  )r+   r5  r>   r   r  r   )r#   r  r  r+   r  r  s         r$   3test_html5_style_meta_tag_reflects_current_encodingzLHTMLTreeBuilderSmokeTest.test_html5_style_meta_tag_reflects_current_encoding  s     =7 	
 yy(( ii:i66i(7"""" '#<===== 44V<<<<<< !K$6$6v$>$>>>>>%););K)H)HHHHHHHr%   c                     dD ]Z}|                      |          }t          D ];}|dv r|                    |          }d|v sJ |                    d          |vsJ <[d S )N)sB   <meta charset="utf8"></head><meta id="encoding" charset="utf-8" />idnambcsoem	undefinedstring_escapezstring-escapes   meta charset=""asciir+   r   r   r#   r&   r+   r  encodeds        r$   2test_python_specific_encodings_not_used_in_charsetzKHTMLTreeBuilderSmokeTest.test_python_specific_encodings_not_used_in_charset&  s    

 	? 	?F 99V$$D5 ? ?     ++h//)W4444w//w>>>>>?		? 	?r%   c                     |                      d          }d|j        d<   d|j                                        k    sJ d S )Nz<a>text</a>rO  r   z<a foo="bar">text</a>)r+   r   r@   )r#   datas     r$   5test_tag_with_no_attributes_can_have_attributes_addedzNHTMLTreeBuilderSmokeTest.test_tag_with_no_attributes_can_have_attributes_added@  s?    yy''u&$&--//999999r%   c                 n    |                      d          }d|j                                        k    sJ d S )Nz0<body><div><p>text1</p></span>text2</div></body>z)<body><div><p>text1</p>text2</div></body>)r+   rq  r@   r   s     r$   $test_closing_tag_with_no_opening_tagz=HTMLTreeBuilderSmokeTest.test_closing_tag_with_no_opening_tagE  s;    
 yyKLL:di>N>N>P>PPPPPPPr%   c                 d    |                      t                    }|                     |           dS z3Test the worst case (currently) for linking issues.Nr+   r   r[   r   s     r$   test_worst_casez(HTMLTreeBuilderSmokeTest.test_worst_caseM  /     yy&&t$$$$$r%   )r   )Irq   rr   rs   __doc__r   r   r   r   r?   r   r   bytesr   r   r   r   r   r  r  r  r	  r  r  r  r!  r%  r)  r.  r0  r2  r6  r;  rB  rG  rK  rM  rR  rU  rW  rY  r[  r]  r`  rc  re  rg  ri  rl  rn  rt  rw  r}  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r-   r%   r$   r   r   z  sc        B B B>
9 
9 
90 0 0, , ,+S +T + + + +  <E, , #,58,	um#	$, , , ,
 
 
% % %
/ / / + + +J J JI I II I I     	F 	F 	F - - -, , ,( ( (* * *' ' '/ / /@ @ @"/ / /E E E$	. 	. 	., , ,
 
 
0
> 
> 
>0 0 00 0 0E E E
 
 

 
 

3 
3 
3? ? ?9 9 9
 
 

2 2 2' ' 'B B B	' 	' 	'/ / /"
F 
F 
F1 1 14 4 4. . .M M M  
 
 
	
 	
 	
L L L) ) )
 
 
9 9 92 2 2" " "8F F F$

 

 

G G GFI I I<? ? ?4: : :
Q Q Q% % % % %r%   r   c                       e Zd Zd Zd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd ZdS )XMLTreeBuilderSmokeTestc                     |                      d          }t          j        |d          }t          j        |          }|j        t
          k    sJ |                                |                                k    sJ d S )Nr   r  )r+   r   r   r   r   r   r@   r   s       r$   r   z9XMLTreeBuilderSmokeTest.test_pickle_and_unpickle_identityU  sp     yy))dA&&f%%=0000}}$++--//////r%   c                 d    |                      d          }|                                dk    sJ d S )N<root/>s.   <?xml version="1.0" encoding="utf-8"?>
<root/>r   r   s     r$   test_docstring_generatedz0XMLTreeBuilderSmokeTest.test_docstring_generated^  s3    yy##{{}} RRRRRRRr%   c                 j    d}|                      |          }||                    d          k    sJ d S )Ns,   <?xml version="1.0" encoding="utf8"?>
<foo/>r   r   r   s      r$   test_xml_declarationz,XMLTreeBuilderSmokeTest.test_xml_declarationb  s;    Eyy  V,,,,,,,,r%   c                     d}|                      |          }t          D ];}|dv r|                    |          }d|v sJ |                    d          |vsJ <d S )Ns   <?xml version="1.0"?>
<foo/>r  s   <?xml version="1.0"?>r  r  r  s        r$   :test_python_specific_encodings_not_used_in_xml_declarationzRXMLTreeBuilderSmokeTest.test_python_specific_encodings_not_used_in_xml_declarationg  s     6yy  1 	; 	;H    kk(++G+w6666??7++7:::::	; 	;r%   c                 j    d}|                      |          }||                    d          k    sJ d S )Ns<   <?xml version="1.0" encoding="utf8"?>
<?PITarget PIContent?>r   r   r   s      r$   r!  z3XMLTreeBuilderSmokeTest.test_processing_instruction~  s;    Uyy  V,,,,,,,,r%   c                 j    d}|                      |          }|                    d          |k    sJ dS )zGA real XHTML document should come out *exactly* the same as it went in.r  r  Nr   r   s      r$   r  z0XMLTreeBuilderSmokeTest.test_real_xhtml_document  s?     yy  {{7##v------r%   c                 h    d}|                      |          }||                                k    sJ d S )Ns  <?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<parent xmlns="http://ns1/">
<child xmlns="http://ns2/" xmlns:ns3="http://ns3/">
<grandchild ns3:attr="value" xmlns="http://ns4/"/>
</child>
</parent>r   r#   docr+   s      r$   test_nested_namespacesz.XMLTreeBuilderSmokeTest.test_nested_namespaces  s9     yy~~dkkmm######r%   c                 v    d}t          |d          }d|j        _        |                                }d|v sJ d S )Nz/
  <script type="text/javascript">
  </script>
zlxml-xmlzconsole.log("< < hey > > ");s   &lt; &lt; hey &gt; &gt;)r   r   rg   r   )r#   r  r+   r  s       r$   5test_formatter_processes_script_tag_for_xml_documentszMXMLTreeBuilderSmokeTest.test_formatter_processes_script_tag_for_xml_documents  sJ     S*-- <++--)W444444r%   c                 X    d}|                      |          }d|j        j        k    sJ d S )Nu?   <?xml version="1.0" encoding="euc-jp"><root>Sacré bleu!</root>r  r+   rootrg   r   s      r$   r  z7XMLTreeBuilderSmokeTest.test_can_parse_unicode_document  s4    syy  49#3333333r%   c                 X    d}|                      |          }d|j        j        k    sJ d S )NuB   ﻿<?xml version="1.0" encoding="euc-jp"><root>Sacré bleu!</root>r  r  r   s      r$   1test_can_parse_unicode_document_begining_with_bomzIXMLTreeBuilderSmokeTest.test_can_parse_unicode_document_begining_with_bom  s7     Gyy  49#3333333r%   c                 h    d}|                      |          }t          |j                  |k    sJ d S )Nz<rss xmlns:dc="foo"><dc:creator>b</dc:creator><dc:date>2012-07-02T20:33:42Z</dc:date><dc:rights>c</dc:rights><image>d</image></rss>)r+   r?   rssr   s      r$   test_popping_namespaced_tagz3XMLTreeBuilderSmokeTest.test_popping_namespaced_tag  s:     Wyy  48}}&&&&&&r%   c                 f    |                      d          }|                    d          dk    sJ d S )Nr  latin1s/   <?xml version="1.0" encoding="latin1"?>
<root/>r   r   s     r$   (test_docstring_includes_correct_encodingz@XMLTreeBuilderSmokeTest.test_docstring_includes_correct_encoding  s7    yy##KK!!%XXXXXXXr%   c                 j    d}|                      |          }|                    d          |k    sJ dS )z<A large XML document should come out the same as it went in.s4  <?xml version="1.0" encoding="utf-8"?>
<root>0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000</root>r  Nr   r   s      r$   test_large_xml_documentz/XMLTreeBuilderSmokeTest.test_large_xml_document  sB     	
 yy  {{7##v------r%   c                 \    |                      dd           |                      d           d S )Nr+  r'  z
<p>foo</p>r-  r"   s    r$   9test_tags_are_empty_element_if_and_only_if_they_are_emptyzQXMLTreeBuilderSmokeTest.test_tags_are_empty_element_if_and_only_if_they_are_empty  s2    '''&&&&&r%   c                 z    d}|                      |          }|j        }d|d         k    sJ d|d         k    sJ d S )Nz<root xmlns:a="http://example.com/" xmlns:b="http://example.net/"><a:foo>This tag is in the a namespace</a:foo><b:foo>This tag is in the b namespace</b:foo></root>zhttp://example.com/zxmlns:azhttp://example.net/zxmlns:b)r+   r  )r#   r&   r+   r  s       r$   test_namespaces_are_preservedz5XMLTreeBuilderSmokeTest.test_namespaces_are_preserved  sR     wyy  y$Y7777$Y777777r%   c                 h    d}|                      |          }t          |j                  |k    sJ d S )NzN<p xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:date>20010504</dc:date></p>)r+   r?   r   r   s      r$   test_closing_namespaced_tagz3XMLTreeBuilderSmokeTest.test_closing_namespaced_tag  s7    ayy  46{{f$$$$$$r%   c                 h    d}|                      |          }t          |j                  |k    sJ d S )Nzs<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><bar xsi:schemaLocation="http://www.example.com"/></foo>r+   r?   r   r   s      r$   test_namespaced_attributesz2XMLTreeBuilderSmokeTest.test_namespaced_attributes  s:     Gyy  48}}&&&&&&r%   c                 h    d}|                      |          }t          |j                  |k    sJ d S )Nz<foo xml:lang="fr">bar</foo>r  r   s      r$   (test_namespaced_attributes_xml_namespacez@XMLTreeBuilderSmokeTest.test_namespaced_attributes_xml_namespace  s7    /yy  48}}&&&&&&r%   c                 &   d}|                      |          }dt          |                    d                    k    sJ dt          |                    d                    k    sJ dt          |                    d                    k    sJ 	 d S )Na  <?xml version="1.0" encoding="utf-8"?>
<Document xmlns="http://example.com/ns0"
    xmlns:ns1="http://example.com/ns1"
    xmlns:ns2="http://example.com/ns2">
    <ns1:tag>foo</ns1:tag>
    <ns1:tag>bar</ns1:tag>
    <ns2:tag key="value">baz</ns2:tag>
</Document>
   ri   r  zns1:tagrT   zns2:tag)r+   rY   r  r  s      r$   test_find_by_prefixed_namez2XMLTreeBuilderSmokeTest.test_find_by_prefixed_name  s     yy~~ Ce,,------ Ci00111111Ci00111111<<<r%   c                     d}|                      |          }|j        }t          j        |          }|j        |j        k    sJ d S )Nzf<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<w:document xmlns:w="http://example.com/ns0"/>)r+   documentr#  prefix)r#   xmlr+   ri   	duplicates        r$   !test_copy_tag_preserves_namespacez9XMLTreeBuilderSmokeTest.test_copy_tag_preserves_namespace  sL    2 yy~~mIcNN	 zY-------r%   c                 d    |                      t                    }|                     |           dS r  r  r   s     r$   r  z'XMLTreeBuilderSmokeTest.test_worst_case  r  r%   N)rq   rr   rs   r   r  r  r  r!  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r-   r%   r$   r  r  T  sR       0 0 0S S S- - -
; ; ;.- - -
	. 	. 	.	$ 	$ 	$
5 
5 
54 4 4
4 4 4
' ' '

 
 
. . .' ' '8 8 8% % %
' ' '
' ' '
= = =,	. 	. 	.% % % % %r%   r  c                   0    e Zd ZdZd Zd Zd Zd Zd ZdS )HTML5TreeBuilderSmokeTestz2Smoke test for a tree builder that supports HTML5.c                     d S r!   r-   r"   s    r$   r  z2HTML5TreeBuilderSmokeTest.test_real_xhtml_document	  s	     	r%   c                 X    d}|                      |          }d|j        j        k    sJ d S )Nz<a>ry  )r+   r   	namespacer   s      r$   test_html_tags_have_namespacez7HTML5TreeBuilderSmokeTest.test_html_tags_have_namespace  s4    yy  -1AAAAAAAr%   c                     d}|                      |          }d}||j        j        k    sJ ||j        j        k    sJ d S )Nz<svg><circle/></svg>r|  )r+   svgr
  circler#   r&   r+   r
  s       r$   test_svg_tags_have_namespacez6HTML5TreeBuilderSmokeTest.test_svg_tags_have_namespace  sO    'yy  0	DH.....DK1111111r%   c                     d}|                      |          }d}||j        j        k    sJ ||j        j        k    sJ d S )Nz<math><msqrt>5</msqrt></math>r{  )r+   mathr
  msqrtr  s       r$   test_mathml_tags_have_namespacez9HTML5TreeBuilderSmokeTest.test_mathml_tags_have_namespace  sO    0yy  8	DI/////DJ0000000r%   c                     d}|                      |          }t          |j        d         t                    sJ |j        d         dk    sJ d|j        d         j        j        k    sJ d S )Nz3<?xml version="1.0" encoding="utf-8"?><html></html>r   z$?xml version="1.0" encoding="utf-8"?r   )r+   r>   rZ   r   rL   r:   r   s      r$   $test_xml_declaration_becomes_commentz>HTML5TreeBuilderSmokeTest.test_xml_declaration_becomes_comment!  sr    Fyy  $-*G44444}Q#IIIIIq)6;;;;;;;r%   N)	rq   rr   rs   r  r  r  r  r  r  r-   r%   r$   r  r    sh        <<  
B B B
2 2 21 1 1< < < < <r%   r  )9r  __license__r   	importlibr#  r  r   bs4r   bs4.elementr   r   r   r   r	   r
   r   r   r   r   
bs4.filterr   bs4.builderr   bs4._typingr   r   bs4.builder._htmlparserr   typingr   r   r   r   r   	soupsiever   SOUP_SIEVE_PRESENTImportErrorutil	find_specHTML5LIB_PRESENT
lxml.etreelxmlLXML_PRESENTetreeLXML_VERSIONr   __annotations__r   r?   objectr   rx   r   r  r  r-   r%   r$   <module>r-     si                                            $ # # # # #      ( ' ' ' ' ' # # # # # # 9 9 9 9 9 9             ------    >++J77tC L:*LL   LLLL &;k" : : :"c " " "JM; M; M; M; M;v M; M; M;`BD BD BD BD BD8 BD BD BDJW% W% W% W% W%3 W% W% W%to% o% o% o% o%2 o% o% o%d <  <  <  <  < 8  <  <  <  <  <s$   %A. .A87A8B+ +	B76B7