
    MhU                        d dl mZ d dlZd dlZd dlmZ d dlmZ d dlmZ  e	d          Z
ddgdgdgd	Z e	d
          Zd                    d  e ed d           edd           edd                    D                       Z ej        dez   dz   ej                  ZdZ G d de          Z G d d          Zd Z G d dej                  ZdS )    )chainN)unescape)html5lib_shim)
parse_shim)aabbracronymb
blockquotecodeemiliolstrongulhreftitle)r   r   r	   )httphttpsmailto c                 ,    g | ]}t          |          S  )chr).0cs     P/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/bleach/sanitizer.py
<listcomp>r   *   s    FFFSVVFFF    	                []?c                       e Zd ZdS )NoCssSanitizerWarningN)__name__
__module____qualname__r   r    r   r*   r*   5   s        Dr    r*   c                   .    e Zd ZdZeeeddddfdZd ZdS )Cleanera  Cleaner for cleaning HTML fragments of malicious content

    This cleaner is a security-focused function whose sole purpose is to remove
    malicious content from a string such that it can be displayed as content in
    a web page.

    To use::

        from bleach.sanitizer import Cleaner

        cleaner = Cleaner()

        for text in all_the_yucky_things:
            sanitized = cleaner.clean(text)

    .. Note::

       This cleaner is not designed to use to transform content to be used in
       non-web-page contexts.

    .. Warning::

       This cleaner is not thread-safe--the html parser has internal state.
       Create a separate cleaner per thread!


    FTNc                 f   || _         || _        || _        || _        || _        |pg | _        || _        t          j        | j         | j        dd          | _	        t          j
        d          | _        t          j        dddddd          | _        |g }t          |t                    r|}n_t          |t                     rJg }|                                D ]3}	t          |	t          t$          f          r|                    |	           4d|v rt)          j        d	t,          
           dS dS dS )a:  Initializes a Cleaner

        :arg set tags: set of allowed tags; defaults to
            ``bleach.sanitizer.ALLOWED_TAGS``

        :arg dict attributes: allowed attributes; can be a callable, list or dict;
            defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``

        :arg list protocols: allowed list of protocols for links; defaults
            to ``bleach.sanitizer.ALLOWED_PROTOCOLS``

        :arg bool strip: whether or not to strip disallowed elements

        :arg bool strip_comments: whether or not to strip HTML comments

        :arg list filters: list of html5lib Filter classes to pass streamed content through

            .. seealso:: http://html5lib.readthedocs.io/en/latest/movingparts.html#filters

            .. Warning::

               Using filters changes the output of ``bleach.Cleaner.clean``.
               Make sure the way the filters change the output are secure.

        :arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
            sanitizing style attribute values and style text; defaults to None

        F)tagsstripconsume_entitiesnamespaceHTMLElementsetreealwaysT)quote_attr_valuesomit_optional_tagsescape_lt_in_attrsresolve_entitiessanitizealphabetical_attributesNstylez7'style' attribute specified, but css_sanitizer not set.)category)r1   
attributes	protocolsr2   strip_commentsfilterscss_sanitizerr   BleachHTMLParserparsergetTreeWalkerwalkerBleachHTMLSerializer
serializer
isinstancelistdictvaluestupleextendwarningswarnr*   )
selfr1   r?   r@   r2   rA   rB   rC   attributes_valuesrM   s
             r   __init__zCleaner.__init__V   st   L 	$"
,}"*#4*""'	
 
 
 $1'::'<&$# #$)
 
 
   !#*d++ 9$.!!J-- 9$&!(//11 9 9F!&4-88 9)00888+++M2      !  ,+r    c           	         t          |t                    s"d|j        j        ddz   }t	          |          |sdS | j                            |          }t          |                     |          | j	        | j
        | j        | j        | j        | j                  }| j        D ]} ||          }| j                            |          S )zCleans text and returns sanitized result as unicode

        :arg str text: text to be cleaned

        :returns: sanitized text as unicode

        :raises TypeError: if ``text`` is not a text type

        zargument cannot be of z type, zmust be of text typer   )sourceallowed_tagsr?   strip_disallowed_tagsstrip_html_commentsrC   allowed_protocols)rV   )rJ   str	__class__r+   	TypeErrorrE   parseFragmentBleachSanitizerFilterrG   r1   r?   r2   rA   rC   r@   rB   rI   render)rR   textmessagedomfilteredfilter_classs         r   cleanzCleaner.clean   s     $$$ 	%K)@KKK()  G$$$ 	2k''--(;;s##"&* $ 3,"n
 
 
 !L 	5 	5L#|8444HH%%h///r    )	r+   r,   r-   __doc__ALLOWED_TAGSALLOWED_ATTRIBUTESALLOWED_PROTOCOLSrT   rf   r   r    r   r/   r/   9   s]         < %#S S S Sj#0 #0 #0 #0 #0r    r/   c                      t                     r S t           t                    r fd}|S t           t                    r fd}|S t	          d          )a0  Generates attribute filter function for the given attributes value

    The attributes value can take one of several shapes. This returns a filter
    function appropriate to the attributes value. One nice thing about this is
    that there's less if/then shenanigans in the ``allow_token`` method.

    c                     | v r*|          }t          |          r || ||          S ||v rdS dv r(d         }t          |          r || ||          S ||v S dS )NT*F)callable)tagattrvalueattr_valr?   s       r   _attr_filterz.attribute_filter_factory.<locals>._attr_filter   s    j  %c?H%% 6#8Cu5558##4j  %c?H%% 6#8Cu555x''5r    c                     |v S Nr   )ro   rp   rq   r?   s      r   rs   z.attribute_filter_factory.<locals>._attr_filter   s    :%%r    z3attributes needs to be a callable, a list or a dict)rn   rJ   rL   rK   
ValueError)r?   rs   s   ` r   attribute_filter_factoryrw      s     
 *d## 	 	 	 	 	$ *d## 	& 	& 	& 	& 	& 
J
K
KKr    c            	       z    e Zd ZdZeeeej        ej	        ej
        dddf	dZd Zd Zd Zd	 Zd
 Zd Zd Zd ZdS )r_   zmhtml5lib Filter that sanitizes text

    This filter can be used anywhere html5lib filters can be used.

    FTNc                    t           j                            | |           t          |          | _        t          |          | _        t          |          | _        || _        |	| _	        || _
        || _        |
| _        || _        dS )a_  Creates a BleachSanitizerFilter instance

        :arg source: html5lib TreeWalker stream as an html5lib TreeWalker

        :arg set allowed_tags: set of allowed tags; defaults to
            ``bleach.sanitizer.ALLOWED_TAGS``

        :arg dict attributes: allowed attributes; can be a callable, list or dict;
            defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``

        :arg list allowed_protocols: allowed list of protocols for links; defaults
            to ``bleach.sanitizer.ALLOWED_PROTOCOLS``

        :arg attr_val_is_uri: set of attributes that have URI values

        :arg svg_attr_val_allows_ref: set of SVG attributes that can have
            references

        :arg svg_allow_local_href: set of SVG elements that can have local
            hrefs

        :arg bool strip_disallowed_tags: whether or not to strip disallowed
            tags

        :arg bool strip_html_comments: whether or not to strip HTML comments

        :arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
            sanitizing style attribute values and style text; defaults to None

        N)r   FilterrT   	frozensetrW   rZ   rw   attr_filterrX   rY   attr_val_is_urisvg_attr_val_allows_refrC   svg_allow_local_href)rR   rV   rW   r?   rZ   r}   r~   r   rX   rY   rC   s              r   rT   zBleachSanitizerFilter.__init__  s    ` 	%%dF333%l33!*+<!=!=3J??%:"#6 .'>$*$8!!!r    c              #      K   |D ]<}|                      |          }|st          |t                    r	|E d {V  8|V  =d S ru   )sanitize_tokenrJ   rK   )rR   token_iteratortokenrets       r   sanitize_streamz%BleachSanitizerFilter.sanitize_streamA  sm      # 		 		E%%e,,C #t$$ 						 		r    c              #   J  K   g }|D ]u}|rK|d         dk    r|                     |           &d                    d |D                       dd}g }|V  n"|d         dk    r|                     |           q|V  vd                    d |D                       dd}|V  dS )z/Merge consecutive Characters tokens in a streamtype
Charactersr   c                     g | ]
}|d          S datar   r   
char_tokens     r   r   z:BleachSanitizerFilter.merge_characters.<locals>.<listcomp>[      TTTJZ/TTTr    )r   r   c                     g | ]
}|d          S r   r   r   s     r   r   z:BleachSanitizerFilter.merge_characters.<locals>.<listcomp>i  r   r    N)appendjoin)rR   r   characters_bufferr   	new_tokens        r   merge_charactersz&BleachSanitizerFilter.merge_charactersM  s     # 	 	E  =L00%,,U333
 !#TTBSTTT! ! !-	! !I )+%#OOOOv,..!((///KKKK GGTTBSTTTUU 
 
	 r    c                     |                      |                     t          j                            |                               S ru   )r   r   r   rz   __iter__)rR   s    r   r   zBleachSanitizerFilter.__iter__n  s<    $$  !5!>!>t!D!DEE
 
 	
r    c                 >   |d         }|dv rB|d         | j         v r|                     |          S | j        rdS |                     |          S |dk    r-| j        s$t          j        |d         ddd	
          |d<   |S dS |dk    r|                     |          S |S )a  Sanitize a token either by HTML-encoding or dropping.

        Unlike sanitizer.Filter, allowed_attributes can be a dict of {'tag':
        ['attribute', 'pairs'], 'tag': callable}.

        Here callable is a function with two arguments of attribute name and
        value. It should return true of false.

        Also gives the option to strip tags instead of encoding.

        :arg dict token: token to sanitize

        :returns: token or list of tokens

        r   )StartTagEndTagEmptyTagnameNCommentr   z&quot;z&#x27;)"')entitiesr   )rW   allow_tokenrX   disallowed_tokenrY   r   escapesanitize_characters)rR   r   
token_types      r   r   z$BleachSanitizerFilter.sanitize_tokens  s      6]
;;;V} 111''...+ 4t ,,U3339$$+  - 4&M(,J,J! ! !f t<''++E222 Lr    c                 &   |                     dd          }|s|S t                              t          |          }||d<   d|vr|S g }t	          j        |          D ]}|s|                    d          rt	          j        |          }|l|dk    r|                    ddd           n|                    d|d	           |t          |          d
z   d         }|r|                    d|d           |                    d|d           |S )a  Handles Characters tokens

        Our overridden tokenizer doesn't do anything with entities. However,
        that means that the serializer will convert all ``&`` in Characters
        tokens to ``&amp;``.

        Since we don't want that, we extract entities here and convert them to
        Entity tokens so the serializer will let them be.

        :arg token: the Characters token to work on

        :returns: a list of tokens

        r   r   &Nampr   )r   r   Entity)r   r      )
getINVISIBLE_CHARACTERS_REsubINVISIBLE_REPLACEMENT_CHARr   next_possible_entity
startswithmatch_entityr   len)rR   r   r   
new_tokenspartentity	remainders          r   r   z)BleachSanitizerFilter.sanitize_characters  sZ    yy$$ 	L&**+EtLLf d??L
 "6t<< 	D 	DD s## &3D99% #))<*M*MNNNN"))8V*L*LMMM !%S[[1_%6%6 7I  U"))<*S*STTT|TBBCCCCr    c                    t          j        |          }t          j        dd|          }|                    dd          }|                                }	 t          j        |          }n# t          $ r Y dS w xY w|j	        r|j	        |v r|S nD|
                    d          r|S d|v r|                    d          d         |v r|S d|v sd	|v r|S dS )
zChecks a uri value to see if it's allowed

        :arg value: the uri value to sanitize
        :arg allowed_protocols: list of allowed protocols

        :returns: allowed value or None

        z[`\000-\040\177-\240\s]+r   u   �N#:r   r   r   )r   convert_entitiesrer   replacelowerr   urlparserv   schemer   split)rR   rq   rZ   normalized_uriparseds        r   sanitize_uri_valuez(BleachSanitizerFilter.sanitize_uri_value  s$    '7>>  ;RPP (//"== (--//	  (88FF 	 	 	44	 = 	} 111 2
 ((--  ~%%"((--a04EEE ***g9J.J.Jts   A+ +
A98A9c                 `   d|v r(i }|d                                          D ]\  }}|\  }}|                     |d         ||          s)|| j        v r |                     || j                  }|P|}|| j        v r<t          j        ddt          |                    }|	                                }|s|}d|d         f| j
        v r.|dt          j        d         dffv rt          j        d	|          r|d
k    r$| j        r| j                            |          }nd}|||<   ||d<   |S )z-Handles the case where we're allowing the tagr   r   Nzurl\s*\(\s*[^#\s][^)]+?\) )Nr   xlinkr   z
^\s*[^#\s])Nr=   r   )itemsr|   r}   r   rZ   r~   r   r   r   r2   r   r   
namespacessearchrC   sanitize_css)	rR   r   attrsnamespaced_nameval	namespacer   	new_valuenew_vals	            r   r   z!BleachSanitizerFilter.allow_token  s   U?? E(-f(;(;(=(= 5- 5-$"1	4 ''ftSAA  #d&::: $ 7 7T=S T TI ( #C #d&BBB f%A3QTVVG%mmooG" & 
 & %-(D,EEE&&&1':FC+   9]C88 %$ #o55) 	!"0==cBB ! *-o&&!E&Mr    c                    |d         }|dk    rd|d          d|d<   n|d         r|dv sJ g }|d                                          D ]V\  \  }}}|r|s||}}||t          j        vr|}nt          j        |          d| }|                    d	| d
| d           Wd|d          d                    |           d|d<   nd|d          d|d<   |                    d          r|d         d d          d|d<   d|d<   |d= |S )Nr   r   z</r   >r   )r   r   r   r   z="r   <r   selfClosingz/>r   )r   r   prefixesr   r   r   )rR   r   r   r   nsr   vr   s           r   r   z&BleachSanitizerFilter.disallowed_tokenZ  s   6]
!!1v111E&MM6] 	1!99999E!&v!4!4!6!6 : :
TA  (d (#RB :=+A!A!A&*OO)6)?)C&L&Ld&L&LO
 888A8889999@f@rwwu~~@@@E&MM 1f000E&M99]## 	6$V}SbS1555E&M$f&Mr    )r+   r,   r-   rg   rh   ri   rj   r   r}   r~   r   rT   r   r   r   r   r   r   r   r   r   r    r   r_   r_      s          "%+%5 - E*?# <9 <9 <9 <9|
 
 
  B
 
 

) ) )V; ; ;z8 8 8tC C CJ$ $ $ $ $r    r_   )	itertoolsr   r   rP   xml.sax.saxutilsr   bleachr   r   r{   rh   ri   rj   r   rangeINVISIBLE_CHARACTERScompileUNICODEr   r   UserWarningr*   r/   rw   SanitizerFilterr_   r   r    r   <module>r      s         				  % % % % % %                   y ( '	Iy   I9::  wwFFUU55A;;b"uuR}}EEFFF  
 %"*S+?%?#%ErzRR  ! 	 	 	 	 	K 	 	 	U0 U0 U0 U0 U0 U0 U0 U0p(L (L (LVB B B B BM9 B B B B Br    