
    MhE                         d dl Z d dlZd dlZd dlZd dlmZ d dlmZmZmZ  G d de	          Z
 G d de	          Z G d d	e	          ZdS )
    N)BeautifulSoup)EntitySubstitutionEncodingDetectorUnicodeDammitc                       e Zd ZdZd Zej                            dg d          d             Zd Z	d Z
d Zd	 Zd
 Zd ZdS )TestUnicodeDammitz"Standalone tests of UnicodeDammit.c                 B    d}t          |          }|j        |k    sJ d S )Nu   I'm already Unicode! ☃)r   unicode_markup)selfmarkupdammits      U/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/bs4/tests/test_dammit.pytest_unicode_inputz$TestUnicodeDammit.test_unicode_input   s.    3v&&$......    z smart_quotes_to,expect_converted))Nu   ‘’“”)xmlz &#x2018;&#x2019;&#x201C;&#x201D;)htmlz&lsquo;&rsquo;&ldquo;&rdquo;)asciiz''""c                 p    d}t          |dg|          j        }|d                    |          k    sJ dS )zbVerify the functionality of the smart_quotes_to argument
        to the UnicodeDammit constructor.s   <foo></foo>windows-1252)known_definite_encodingssmart_quotes_toz<foo>{}</foo>N)r   r
   format)r   r   expect_convertedr   	converteds        r   test_smart_quotes_toz&TestUnicodeDammit.test_smart_quotes_to   sY     0!&4%5+
 
 
 	 	
 O223CDDDDDDDDr   c                     d}t          |          }|j                                        dk    sJ |j        dk    sJ d S )Ns   Sacré bleu! ☃utf-8u   Sacré bleu! ☃r   original_encodinglowerr
   )r   utf8r   s      r   test_detect_utf8z"TestUnicodeDammit.test_detect_utf8*   sO    1t$$'--//7::::$(DDDDDDDr   c                     d}t          |dg          }|j                                        dk    sJ |j        dk    sJ d S )N   
iso-8859-8u   םולשr   )r   hebrewr   s      r   test_convert_hebrewz%TestUnicodeDammit.test_convert_hebrew0   sS    $v~66'--//<????$(BBBBBBBr   c                     d}t          |          }|j                                        dk    sJ |j                            d          |k    sJ d S )Ns   ケータイ Watchr   )r   r   r    r
   encode)r   utf_8r   s      r   /test_dont_see_smart_quotes_where_there_are_nonezATestUnicodeDammit.test_dont_see_smart_quotes_where_there_are_none6   s\    Iu%%'--//7::::$++G44======r   c                     d                     d          }t          |dg          }|j                                        dk    sJ d S )N   Räksmörgåsr   r%   r)   r   r   r    r   	utf8_datar   s      r    test_ignore_inappropriate_codecsz2TestUnicodeDammit.test_ignore_inappropriate_codecs<   sL    #**733	y<.99'--//7::::::r   c                     d                     d          }dD ]2}t          ||g          }|j                                        dk    sJ 3d S )Nr-   r   )z.utf8z...z
utF---16.!r.   )r   r0   bad_encodingr   s       r   test_ignore_invalid_codecsz,TestUnicodeDammit.test_ignore_invalid_codecsA   sb    #**733	: 	? 	?L"9|n==F+1133w>>>>>	? 	?r   c                     d                     d          }t          |dg          }|j                                        dk    sJ t          |ddg          }|j        J d S )Nr-   r   )exclude_encodingsr   r.   r/   s      r   test_exclude_encodingsz(TestUnicodeDammit.test_exclude_encodingsG   sy    #**733	 yWIFFF'--//>AAAA yWn<UVVV'/////r   N)__name__
__module____qualname____doc__r   pytestmarkparametrizer   r"   r'   r+   r1   r4   r7    r   r   r   r      s        ,,/ / /
 [*	
 	
 	
 	E 	E 	EE E EC C C> > >; ; ;
? ? ?0 0 0 0 0r   r   c                   D    e Zd Zd Zd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
S )TestEncodingDetectorc                 X    t          d          }t          |j                  }d|v sJ d S )Ns'   <?xml version="1.0" encoding="UTF-" ?>u   utf-�)r   list	encodings)r   detectedrD   s      r   Ptest_encoding_detector_replaces_junk_in_encoding_name_with_replacement_characterzeTestEncodingDetector.test_encoding_detector_replaces_junk_in_encoding_name_with_replacement_characterV   s9     $$QRR+,,	.);;;;;;r   c                 L    dD ] }t          |d          }d|j        k    sJ !d S )N)s&   <html><meta charset="euc-jp" /></html>s&   <html><meta charset='euc-jp' /></html>s$   <html><meta charset=euc-jp /></html>s#   <html><meta charset=euc-jp/></html>Tis_htmlzeuc-jp)r   r   r   datar   s      r    test_detect_html5_style_meta_tagz5TestEncodingDetector.test_detect_html5_style_meta_tag]   sH    
 	8 	8D #4666Fv777777	8 	8r   c                    d}t           j        j        }t          j        t          j                   	 d }|t           j        _        t          |          }d|j        u sJ d|j        v sJ t          |d          }|j        sJ 	 t          j        t          j
                   |t           j        _        d S # t          j        t          j
                   |t           j        _        w xY w)NsT   ﻿<?xml version="1.0" encoding="UTF-8"?>
<html><b>بتر</b>
<i>ѐ</i></html>c                     d S Nr?   )strs    r   noopzETestEncodingDetector.test_last_ditch_entity_replacement.<locals>.noop~   s    tr   Tu   �zhtml.parser)bs4r   _chardet_dammitloggingdisableWARNINGr   contains_replacement_charactersr
   r   NOTSET)r   docchardetrQ   r   soups         r   "test_last_ditch_entity_replacementz7TestEncodingDetector.test_last_ditch_entity_replacementg   s     2 *,(((	1   *.CJ&"3''F6AAAAAv44444 m44D77777OGN+++)0CJ&&& OGN+++)0CJ&0000s   AB7 71C(c                 \    d}t          |          }d|j        k    sJ d|j        k    sJ d S )N   < a >   < / a > u   <a>áé</a>utf-16le)r   r
   r   rJ   s      r   test_byte_order_mark_removedz1TestEncodingDetector.test_byte_order_mark_removed   sB    Mt$$ 55555V5555555r   c                 L   d}t          |          }t          |dg          }d|j        k    sJ t          |dg          }d|j        k    sJ dgd |j        D             k    sJ d}t          |dgd	g
          }d	|j        k    sJ dd	gd |j        D             k    sJ d S )Nr^   zutf-16)r   r   )user_encodingsr_   c                     g | ]
}|d          S r   r?   .0xs     r   
<listcomp>zRTestEncodingDetector.test_known_definite_versus_user_encodings.<locals>.<listcomp>   s    EEE!EEEr   r$   r%   )r   rb   c                     g | ]
}|d          S rd   r?   re   s     r   rh   zRTestEncodingDetector.test_known_definite_versus_user_encodings.<locals>.<listcomp>   s    *P*P*PA1Q4*P*P*Pr   )r   r   tried_encodings)r   rK   r   beforeafterr&   s         r   )test_known_definite_versus_user_encodingsz>TestEncodingDetector.test_known_definite_versus_user_encodings   s    
 Nt$$ txjIII633333
 dG9===U44444|EEf.DEEEEEEE %gY~
 
 
 v77777&*P*P9O*P*P*PPPPPPPr   c                 D   d}t          j        d          5 }t          |dgdgdg          }d d d            n# 1 swxY w Y   |\  }|j        }t	          |t
                    sJ |j        t          k    sJ d|j        k    sJ g dd	 |j	        D             k    sJ d S )
Nr$   T)record	shift-jisr   r%   )r   override_encodingsrb   )rp   r   r%   c                     g | ]
}|d          S rd   r?   re   s     r   rh   zKTestEncodingDetector.test_deprecated_override_encodings.<locals>.<listcomp>   s    222aQqT222r   )
warningscatch_warningsr   message
isinstanceDeprecationWarningfilename__file__r   rj   )r   r&   wr   warningru   s         r   "test_deprecated_override_encodingsz7TestEncodingDetector.test_deprecated_override_encodings   s!    %$D111 	Q"*5$+9 ,~	  F	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	/'#5666668++++v77777 4332261222
 
 
 
 
 
s   ;??c                 T   d                     d          }d                     d          }||z   |z   }t          j        t                    5  |                    d           d d d            n# 1 swxY w Y   t          j        |          }d|                    d          k    sJ d S )Nu	   ☃☃☃r!   u   “Hi, I like Windows!”windows_1252u+   ☃☃☃“Hi, I like Windows!”☃☃☃)r)   r<   raisesUnicodeDecodeErrordecoder   	detwingle)r   r!   r~   rY   fixeds        r   test_detwinglez#TestEncodingDetector.test_detwingle   s    !))&11.
&
 
  	 \!D( ]-.. 	 	JJv	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ',,<V@T@TTTTTTTs   A..A25A2c                     dD ]J}|                     d          }|                    d          sJ t          j        |          }||k    sJ Kd S )N)u   œu   ₓu   ðr!      )r)   endswithr   r   )r   tricky_unicode_charinputoutputs       r   +test_detwingle_ignores_multibyte_charactersz@TestEncodingDetector.test_detwingle_ignores_multibyte_characters   sj    
$
 	# 	#
 (..v66E>>'*****",U33FU?????	# 	#r   c                    d}|                     d          }d}|                     d          }t          j        } ||d          J d ||d          k    sJ d ||d          k    sJ d ||          k    sJ d ||          k    sJ d	d
z  } |||z             J  |||z             J  |||z   dd          dk    sJ  ||d          dk    sJ  |d	|z   d          dk    sJ  |d|z   d          J d S )Nz0<html><head><meta charset="utf-8"></head></html>r   z,<?xml version="1.0" encoding="ISO-8859-1" ?>FrH   r   Tz
iso-8859-1    i  )rI   search_entire_document)r      a)r)   r   find_declared_encoding)r   html_unicode
html_bytesxml_unicode	xml_bytesmspacers          r   test_find_declared_encodingz0TestEncodingDetector.test_find_declared_encoding   s    J!((11
D&&w//	3qu---555!!L$7777777!!J5555555qq~~----qq||++++ q*$%%---q)#$$,,,
 Afz!4MMMQXXXXX q4888LHHHHq	!$???<OOOOq	!$???GGGGGr   N)r8   r9   r:   rF   rL   r\   r`   rm   r|   r   r   r   r?   r   r   rA   rA   U   s        < < <8 8 8#1 #1 #1J6 6 6Q Q Q@
 
 
.U U U0# # #'H 'H 'H 'H 'Hr   rA   c                      e Zd ZdZd Zej                            dddg          d             Zd Z	d Z
d	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zej                            dg d          d             Zej                            dg d          d             Zej                            ddg          d             ZdS )TestEntitySubstitutionz1Standalone tests of the EntitySubstitution class.c                     t           | _        d S rO   )r   subr   s    r   setup_methodz#TestEntitySubstitution.setup_method  s    %r   zoriginal,substituted)u   foo∀☃õbaru   foo&forall;☃&otilde;bar)u   ‘’foo“”z&lsquo;&rsquo;foo&ldquo;&rdquo;c                 F    | j                             |          |k    sJ d S rO   r   substitute_html)r   originalsubstituteds      r   test_substitute_htmlz+TestEntitySubstitution.test_substitute_html!  s+     x''11[@@@@@@r   c                 n    dD ]1\  }}d}||z  }||z  }| j                             |          |k    sJ 2d S )N)	)z&models;u   ⊧)z&Nfr;u   𝔑)z&ngeqq;u   ≧̸)z&not;   ¬)z&Not;u   ⫬z||)fjr   )z&gt;>)z&lt;<z3 %s 4r   )r   entityutemplaterawwith_entitiess         r   test_html5_entityz(TestEntitySubstitution.test_html5_entity/  sc    
 	B 	BIFA(  HQ,C$v-M8++C00MAAAAA/	B 	Br   c                     d}d}| j                             |          |k    sJ d}d}| j                             |          |k    sJ d S )Nu   fjords ⊔ penguinszfjords &sqcup; penguinsu   fjords ⊔︀ penguinszfjords &sqcups; penguinsr   )r   rK   r   s      r   )test_html5_entity_with_variation_selectorz@TestEntitySubstitution.test_html5_entity_with_variation_selectorI  s`     (*x''--7777-+x''--777777r   c                 L    d}| j                             |d          |k    sJ d S )NWelcome to "my bar"Fr   substitute_xmlr   ss     r   Itest_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_falsez`TestEntitySubstitution.test_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_falseU  s0    !x&&q%00A555555r   c                     | j                             dd          dk    sJ | j                             dd          dk    sJ d S )NWelcomeTz	"Welcome"z	Bob's Barz"Bob's Bar"r   r   s    r   6test_xml_attribute_quoting_normally_uses_double_quoteszMTestEntitySubstitution.test_xml_attribute_quoting_normally_uses_double_quotesY  sN    x&&y$77;FFFFx&&{D99^KKKKKKr   c                 L    d}| j                             |d          dk    sJ d S )Nr   Tz'Welcome to "my bar"'r   r   s     r   Otest_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quoteszfTestEntitySubstitution.test_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quotes]  s3     "x&&q$//3LLLLLLLr   c                 L    d}| j                             |d          dk    sJ d S )NWelcome to "Bob's Bar"Tz""Welcome to &quot;Bob's Bar&quot;"r   r   s     r   btest_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quoteszyTestEntitySubstitution.test_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quotesc  s3     &x&&q$//3XXXXXXXr   c                 J    d}| j                             |          |k    sJ d S )Nr   r   )r   quoteds     r   <test_xml_quotes_arent_escaped_when_value_is_not_being_quotedzSTestEntitySubstitution.test_xml_quotes_arent_escaped_when_value_is_not_being_quotedi  s.    *x&&v..&888888r   c                 F    | j                             d          dk    sJ d S )Nzfoo<bar>zfoo&lt;bar&gt;r   r   s    r   'test_xml_quoting_handles_angle_bracketsz>TestEntitySubstitution.test_xml_quoting_handles_angle_bracketsm  s*    x&&z226FFFFFFFr   c                 F    | j                             d          dk    sJ d S )NzAT&TzAT&amp;Tr   r   s    r   #test_xml_quoting_handles_ampersandsz:TestEntitySubstitution.test_xml_quoting_handles_ampersandsp  s)    x&&v..*<<<<<<r   c                 F    | j                             d          dk    sJ d S )N&Aacute;T&Tz&amp;Aacute;T&amp;Tr   r   s    r   Etest_xml_quoting_including_ampersands_when_they_are_part_of_an_entityz\TestEntitySubstitution.test_xml_quoting_including_ampersands_when_they_are_part_of_an_entitys  s*    x&&}559NNNNNNNr   c                 F    | j                             d          dk    sJ d S )Nr   z&Aacute;T&amp;T)r   "substitute_xml_containing_entitiesr   s    r   Dtest_xml_quoting_ignoring_ampersands_when_they_are_part_of_an_entityz[TestEntitySubstitution.test_xml_quoting_ignoring_ampersands_when_they_are_part_of_an_entityv  s6    H77FF ! ! ! ! ! !r   c                 J    d}| j                             |          |k    sJ dS )z:There's no need to do this except inside attribute values.zBob's "bar"Nr   )r   texts     r    test_quotes_not_html_substitutedz7TestEntitySubstitution.test_quotes_not_html_substituted|  s.    x''--555555r   zmarkup, old))z	foo & barzfoo &amp; bar)zfoo&zfoo&amp;)z
foo&&& barzfoo&amp;&amp;&amp; bar)zx=1&y=2zx=1&amp;y=2)z&123z&amp;123)z&abcz&amp;abc)z
foo &0 barzfoo &amp;0 bar)zfoo &lolwat barzfoo &amp;lolwat barc                     | j                             |          |k    sJ | j                             |          |k    sJ d S rO   r   r   substitute_html5_raw)r   r   olds      r   'test_unambiguous_ampersands_not_escapedz>TestEntitySubstitution.test_unambiguous_ampersands_not_escaped  sL     x''//36666x,,V44>>>>>>r   zmarkup,html,html5,html5raw))&divide;&amp;divide;r   r   )z
&nonesuch;&amp;nonesuch;r   r   )z&#247;
&amp;#247;r   r   )z&#xa1;
&amp;#xa1;r   r   c                     | j                             |          |k    sJ | j                             |          |k    sJ | j                             |          |k    sJ d S rO   )r   r   substitute_html5r   )r   r   r   html5html5raws        r   'test_when_entity_ampersands_are_escapedz>TestEntitySubstitution.test_when_entity_ampersands_are_escaped  sm    , x''//47777x((00E9999x,,V44@@@@@@r   zmarkup,expect)z&nosuchentity;z&amp;nosuchentity;c                     | j                             |          |k    sJ | j                             |          |k    sJ d S rO   r   )r   r   expects      r   !test_ambiguous_ampersands_escapedz8TestEntitySubstitution.test_ambiguous_ampersands_escaped  sL     x''//69999x,,V44>>>>>>r   N)r8   r9   r:   r;   r   r<   r=   r>   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r?   r   r   r   r     s       ;;& & & [ S C	

 
A A
 
AB B B4
8 
8 
86 6 6L L LM M MY Y Y9 9 9G G G= = =O O O
 
 
6 6 6
 [		
 		
 		
 ? ? ? [$	
 	
 	
 A A A" [BC ? ? ? ? ?r   r   )r<   rT   rs   rR   r   
bs4.dammitr   r   r   objectr   rA   r   r?   r   r   <module>r      s      



               D0 D0 D0 D0 D0 D0 D0 D0NCH CH CH CH CH6 CH CH CHLV? V? V? V? V?V V? V? V? V? V?r   