
    MhC\                        d Z ddlZddlZddlZ ej        dded           ddlmZmZ ddlm	Z	 dd	l
mZmZ dd
l
mZ ddlmZ ddlmZmZmZmZmZmZ ddlmZ ddlmZ ddlmZmZ ddlmZm Z  ddl!m"Z" e	j#        Z$ e"e$          Z%e	j&        d         e	j&        d         e	j&        d         hZ'e	j&        d         Z(e	j&        d         Z)e	j&        d         Z*e	j&        d         Z+ e,d          Z- e,d          Z. G d d          Z/ G d de           Z0 G d de          Z1d Z2d Z3d  Z4 ej5        d!          Z6d" Z7 G d# d$e          Z8dS )%z
Shim module between Bleach and html5lib. This makes it easier to upgrade the
html5lib library without having to change a lot of code.
    Nignorez"html5lib's sanitizer is deprecatedzbleach._vendor.html5lib)messagecategorymodule)
HTMLParsergetTreeWalker)	constants)
namespacesprefixes)_ReparseException)Filter)allowed_protocolsallowed_css_propertiesallowed_svg_propertiesattr_val_is_urisvg_attr_val_allows_refsvg_allow_local_href)HTMLInputStream)escapeHTMLSerializer)attributeMapHTMLTokenizer)TrieStartTagEndTagEmptyTag
Characters
ParseError)paabbraddressareaarticleasideaudiobbasebdibdo
blockquotebodybrbuttoncanvascaptioncitecodecolcolgroupdatadatalistdddeldetailsdfndialogdivdldtemembedfieldset
figcaptionfigurefooterformh1h2h3h4h5h6headheaderhgrouphrhtmliiframeimginputinskbdkeygenlabellegendlilinkmapmarkmenumetameternavnoscriptobjectoloptgroupoptionoutputpparampicturepreprogressqrprtrubyssampscriptsectionselectslotsmallsourcespanstrongstylesubsummarysuptabletbodytdtemplatetextareatfootththeadtimetitletrtrackuulvarvideowbr)!r!   r#   r$   r*   r8   r:   r6   r;   r<   r=   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rL   rM   rN   rY   mainr`   rc   rg   rj   rs   r~   r   c                   z    e Zd ZdZd Zed             Zed             Zed             Zd Z	ddZ
d	 Zd
 Zd ZdS )InputStreamWithMemoryzWraps an HTMLInputStream to remember characters since last <

    This wraps existing HTMLInputStream classes to keep track of the stream
    since the last < which marked an open tag state.

    c                 f    || _         | j         j        | _        | j         j        | _        g | _        d S N)_inner_streamresetposition_buffer)selfinner_streams     T/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/bleach/html5lib_shim.py__init__zInputStreamWithMemory.__init__   s/    )'-
*3    c                     | j         j        S r   )r   errorsr   s    r   r   zInputStreamWithMemory.errors  s    !((r   c                     | j         j        S r   )r   charEncodingr   s    r   r   z"InputStreamWithMemory.charEncoding  s    !..r   c                     | j         j        S r   )r   changeEncodingr   s    r   r   z$InputStreamWithMemory.changeEncoding
  s    !00r   c                 p    | j                                         }|r| j                            |           |S r   )r   charr   append)r   cs     r   r   zInputStreamWithMemory.char  s9    ##%% 	#L"""r   Fc                     | j                             ||          }| j                            t	          |                     |S )N)opposite)r   
charsUntilr   extendlist)r   
charactersr   charss       r   r   z InputStreamWithMemory.charsUntil  s>    "--j8-LLDKK(((r   c                 x    | j         r| j                             d           | j                            |          S )N)r   popr   unget)r   r   s     r   r   zInputStreamWithMemory.unget  s9    < 	!LR   !''---r   c                 6    d                     | j                  S )zReturns the stream history since last '<'

        Since the buffer starts at the last '<' as as seen by tagOpenState(),
        we know that everything from that point to when this method is called
        is the "tag" that is being tokenized.

         )joinr   r   s    r   get_tagzInputStreamWithMemory.get_tag  s     wwt|$$$r   c                     dg| _         dS )zResets stream history to just '<'

        This gets called by tagOpenState() which marks a '<' that denotes an
        open tag. Any time we see that, we reset the buffer.

        <N)r   r   s    r   	start_tagzInputStreamWithMemory.start_tag)  s     ur   NF)__name__
__module____qualname____doc__r   propertyr   r   r   r   r   r   r   r    r   r   r   r      s            ) ) X) / / X/ 1 1 X1     
. . .
% % %    r   r   c                   N     e Zd ZdZd	 fd	Z fdZd
 fd	Z fdZ fdZ xZ	S )BleachHTMLTokenizerz1Tokenizer that doesn't consume character entitiesFc                      t                      j        di | || _        t          | j                  | _        d | _        d S )Nr   )superr   consume_entitiesr   streamemitted_last_token)r   r   kwargs	__class__s      r   r   zBleachHTMLTokenizer.__init__6  sK    ""6""" 0 ,DK88 #'r   c              #   z  K   d }t                                                      D ]+}||d         dk    r_|d         t          v rP|                    d          r;t	          d |d                                         D                       |d<   d }|V  n|d         dk    rq| j        j        e|d                                         	                                | j        j        vr-| j
                                        |d<   t          |d<   d }|V  n"|d         t          k    r|V  |}n
|V  |V  d }|d         t          k    r|}'|V  -|rf|d         dk    r%t          | j
                                        dV  d S |d         dv r%t          | j
                                        dV  d S |V  d S d S )	Nr4   z#invalid-character-in-attribute-nametypec              3   <   K   | ]\  }}d |vrd|vr
d|v||fV  dS )"'r   Nr   ).0	attr_name
attr_values      r   	<genexpr>z/BleachHTMLTokenizer.__iter__.<locals>.<genexpr>Q  sZ       1 11Izy00 #9 4 4 #9 4 4 #J/
 !5 4 4 41 1r   z!expected-closing-tag-but-got-charzeof-in-tag-namer   r4   )zduplicate-attributezeof-in-attribute-namez eof-in-attribute-value-no-quoteszexpected-end-of-tag-but-got-eof)r   __iter__TAG_TOKEN_TYPESgetr   itemsparsertagslowerstripr   r   TAG_TOKEN_TYPE_CHARACTERSTAG_TOKEN_TYPE_PARSEERROR)r   last_error_tokentokenr   s      r   r   zBleachHTMLTokenizer.__iter__A  sH     WW%%'' B	 B	E+$V,0UUUf88		&)) 9 %1 1 15:6]5H5H5J5J1 1 1 % %E&M (,$KKKK %V,0SSS(4f++--3355T[=MMM %)K$7$7$9$9E&M$=E&M'+$KKKK6]&??? +***',$$ +***KKK'+$ V} 999#( KKKK 	''+<<<
  9$+BUBUBWBWXXXXXXX!&) .     9$+BUBUBWBWXXXXXXX '&&&&&7	' 	'r   Nc                     | j         r"t                                          ||          S |r#| j        d         d         dxx         dz  cc<   d S | j                            t          dd           d S )Nr4   r      &r   )r   r   consumeEntitycurrentToken
tokenQueuer   r   )r   allowedCharfromAttributer   s      r   r   z!BleachHTMLTokenizer.consumeEntity  s       	E77((mDDD  	Uf%b)!,,,3,,,,, O"",Es#S#STTTTTr   c                 v    | j                                          t                                                      S r   )r   r   r   tagOpenState)r   r   s    r   r   z BleachHTMLTokenizer.tagOpenState  s/    
 	ww##%%%r   c                 8   | j         }| j        j        |d         t          v r|d                                         | j        j        vr| j        j        r?| j        r5|d         t          k    r$|d                                         t          v rd}nd}n| j	        
                                }t          |d}|x| _         | _        | j                            |           | j        | _        d S | j         | _        t!                                                       d S )Nr   name
r   r   )r   r   r   r   r   r   r   TAG_TOKEN_TYPE_STARTHTML_TAGS_BLOCK_LEVELr   r   r   r   r   	dataStatestater   emitCurrentToken)r   r   new_data	new_tokenr   s       r   r   z$BleachHTMLTokenizer.emitCurrentToken  s   ! K(f00f##%%T[-===
 {  1+"f)===f++--1FFF
  $HH  "HH  ;..00!:HMMI:CCD 7O""9---DJF"&"3  """""r   r   )NF)
r   r   r   r   r   r   r   r   r   __classcell__r   s   @r   r   r   3  s        ;;	' 	' 	' 	' 	' 	'b' b' b' b' b'HU U U U U U$& & & & &*# *# *# *# *# *# *# *# *#r   r   c                   ,     e Zd ZdZ fdZ	 ddZ xZS )BleachHTMLParserz$Parser that uses BleachHTMLTokenizerc                     |t          d |D                       nd| _        || _        || _         t	                      j        di | dS )a  
        :arg tags: set of allowed tags--everything else is either stripped or
            escaped; if None, then this doesn't look at tags at all
        :arg strip: whether to strip disallowed tags (True) or escape them (False);
            if tags=None, then this doesn't have any effect
        :arg consume_entities: whether to consume entities (default behavior) or
            leave them as is when tokenizing (BleachHTMLTokenizer-added behavior)

        Nc              3   >   K   | ]}|                                 V  d S r   )r   )r   tags     r   r   z,BleachHTMLParser.__init__.<locals>.<genexpr>  s*      33ssyy{{333333r   r   )	frozensetr   r   r   r   r   )r   r   r   r   r   r   s        r   r   zBleachHTMLParser.__init__  sd     9=8HI33d333444d 		 
 0""6"""""r   Fr;   Tc                 (   || _         || _        || _        t          d|| j        | d|| _        |                                  	 |                                  d S # t          $ r, |                                  |                                  Y d S w xY w)N)r   r   r   r   )	innerHTMLMode	container	scriptingr   r   	tokenizerr   mainLoopReparseException)r   r   	innerHTMLr   r   r   s         r   _parsezBleachHTMLParser._parse   s     '"", 
D,A$
 
RX
 
 	

	MMOOOOO 	 	 	JJLLLMMOOOOOO	s   A 2BB)Fr;   T)r   r   r   r   r   r  r   r   s   @r   r   r     sY        ..# # # # #$ CG       r   r   c                 2   | d         dk    rqt          |           dk     rdS | d         dv r| dd         d}}n| dd         d}}|d	k    rdS t          ||          }d|cxk     rd
k     rn nt          |          S dS t                              | d          S )a9  Convert an entity (minus the & and ; part) into what it represents

    This handles numeric, hex, and text entities.

    :arg value: the string (minus the ``&`` and ``;`` part) to convert

    :returns: unicode character or None if it's an ambiguous ampersand that
        doesn't match a character entity

    r   #   Nr   xX   
   r   i   )lenintchrENTITIESr   )valueint_as_stringr'   
code_points       r   convert_entityr    s     Qx3u::>>48z!!"')R4MM #()R4MB4--
z$$$$H$$$$$z??"4<<t$$$r   c                    d| vr| S g }t          |           D ]}|s|                    d          rit          |          }|Xt          |          }|G|                    |           |t          |          dz   d         }|r|                    |           |                    |           d                    |          S )zConverts all found entities in the text

    :arg text: the text to convert entities in

    :returns: unicode text with converted entities

    r   Nr  r   )next_possible_entity
startswithmatch_entityr  r   r  r   )textnew_textpartentity	converted	remainders         r   convert_entitiesr  :  s     $H$T**   	??3 	!$''F!*622	 (OOI... $S[[1_%6%6 7I 3 	222778r   c                    | d         dk    rt          d          | dd         } t          |           } d}dt          j        z   }| r| d         dk    rd}|                     d           | r%| d         d	v rd
}||                     d          z  }nd}| r5| d         |vr+|                     d          }||vrn||z  }| r
| d         |v+|r| r| d         dk    r|S dS | rL| d         |vrB|                     d          }||z  }t
                              |          sdS | r
| d         |vB|r| r| d         dk    r|S dS )av  Returns first entity in stream or None if no entity exists

    Note: For Bleach purposes, entities must start with a "&" and end with a
    ";". This ignores ambiguous character entities that have no ";" at the end.

    :arg stream: the character stream

    :returns: the entity string without "&" or ";" if it's a valid character
        entity; ``None`` otherwise

    r   r   zStream should begin with "&"r   Nr   z<&=;r  r  0123456789abcdefABCDEF
0123456789;)
ValueErrorr   string
whitespacer   ENTITIES_TRIEhas_keys_with_prefix)r   possible_entityend_charactersallowedr   s        r   r  r  ]  s    ayC7888ABBZF&\\FOf//N  &)s""

1 	#fQi:--.Gvzz!}},OO"G  	!.88

1Aq O	  	!.88  	#v 	#&)s*:*:""t  VAYn44JJqMM111/BB 	 4  VAYn44  6 fQi3&6&64r   z(&)c              #      K   t          t                              |                     D ] \  }}|dk    r|V  |dz  dk    rd|z   V  !dS )zTakes a text and generates a list of possible entities

    :arg text: the text to look at

    :returns: generator where each part (except the first) starts with an
        "&"

    r   r  r   N)	enumerateAMP_SPLIT_REsplit)r  rP   r  s      r   r  r    sm       \//5566  466JJJJUaZZ*	 r   c                   .     e Zd ZdZdZd Zd fd	Z xZS )BleachHTMLSerializerz[HTMLSerializer that undoes & -> &amp; in attributes and sets
    escape_rcdata to True
    Tc              #   L  K   |                     dd          }t          |          D ]{}|s|                    d          rIt          |          }|8t	          |          )d| dV  |t          |          dz   d         }|r|V  c|                     dd          V  |dS )z,Escapes just bare & in HTML attribute valuesz&amp;r   Nr#  r  )replacer  r  r  r  r  )r   stokenr  r  s       r   escape_base_ampz$BleachHTMLSerializer.escape_base_amp  s       -- )00 	- 	-D s## %d++ %.*@*@*L'f---'''  Fa 1 12D #"


,,sG,,,,,,%	- 	-r   Nc              #     K   d}d}t                                          ||          D ]\}|r=|dk    rd}n/|r%|dk    r|                     |          E d{V  d}3n|dk    rd}|V  A|                    d          rd}|V  ]dS )zWrap HTMLSerializer.serialize and conver & to &amp; in attribute values

        Note that this converts & to &amp; in attribute values where the & isn't
        already part of an unambiguous character entity.

        F>r   N=Tr   )r   	serializer5  r  )r   
treewalkerencodingin_tagafter_equalsr4  r   s         r   r9  zBleachHTMLSerializer.serialize  s       gg''
H== 	 	F S=="FF! (}}#'#7#7#?#????????', 	 % s]]#'L$$S)) "!F'	 	r   r   )r   r   r   r   escape_rcdatar5  r9  r   r   s   @r   r1  r1    s]          M- - ->         r   r1  )9r   rer%  warningsfilterwarningsDeprecationWarningbleach._vendor.html5libr   r   r	   !bleach._vendor.html5lib.constantsr
   r   r   r  $bleach._vendor.html5lib.filters.baser   )bleach._vendor.html5lib.filters.sanitizerr   r   r   r   r   r   SanitizerFilter$bleach._vendor.html5lib._inputstreamr   "bleach._vendor.html5lib.serializerr   r   "bleach._vendor.html5lib._tokenizerr   r   bleach._vendor.html5lib._trier   entitiesr  r'  
tokenTypesr   r   TAG_TOKEN_TYPE_ENDr   r   r   	HTML_TAGSr   r   r   r   r  r  r  compiler.  r  r1  r   r   r   <module>rQ     s   
 
			    0$	                                                                              X $"$
 !+J7 )(3 %0> %0> 
 Iqs s	r "	"$ $ N< < < < < < < <~v# v# v# v# v#- v# v# v#r) ) ) ) )z ) ) )X% % %D     F9 9 9x rz%     I I I I I> I I I I Ir   