
    Mh=                        d dl mZmZmZ d dlmZ d dlZd dlmZm	Z	 ddl
mZmZmZ ddl
mZmZmZ ddlmZmZ d d	lmZ d
                    e          dz   Z ej        dez   dz             Z ej        dez   dz             Zi Z ed          dk    Z e ej                              D ]v\  Z Z!er ee!          dk    ses ee!          dk    r(e!dk    rH ee!          dk    r ej"        e!          Z!n e#e!          Z!e!evse $                                re ee!<   wd Z% ede%           ddZ& G d de'          Z( G d de)          Z*dS )    )absolute_importdivisionunicode_literals)	text_typeN)register_errorxmlcharrefreplace_errors   )voidElementsbooleanAttributesspaceCharacters)rcdataElementsentitiesxmlEntities)treewalkers_utils)escape z"'=<>`[]u_    	
 /`  ᠎᠏               　]u   􏿿   &c           
      `   t          | t          t          f          rg }g }d}t          | j        | j        | j                           D ]\  }}|rd}
|| j        z   }t          j        | j        |t          | j        |dz   g                             r't          j
        | j        ||dz                      }d}nt          |          }|                    |           |D ]}t                              |          }	|	rU|                    d           |                    |	           |	                    d          s|                    d           s|                    dt!          |          dd          z             d                    |          | j        fS t%          |           S )NFr   Tr   ;z&#x%s;r   )
isinstanceUnicodeEncodeErrorUnicodeTranslateError	enumerateobjectstartendr   isSurrogatePairminsurrogatePairToCodepointordappend_encode_entity_mapgetendswithhexjoinr   )
excres
codepointsskipicindex	codepointcpes
             b/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/bleach/_vendor/html5lib/serializer.pyhtmlentityreplace_errorsr6   *   s   #*,ABCC -
cj37):;<< 
	) 
	)DAq 	ME%cjsCGUQY;O7P7P1P&QRR #";CJuUUVY<WXX	FF	i(((( 	5 	5B"&&r**A 5

3

1zz# $JJsOOO

8s2wwqrr{34444cg&&',,,    htmlentityreplaceetreec                     t          j        |          }t          di |}|                     ||           |          S )a  Serializes the input token stream using the specified treewalker

    :arg input: the token stream to serialize

    :arg tree: the treewalker to use

    :arg encoding: the encoding to use

    :arg serializer_opts: any options to pass to the
        :py:class:`html5lib.serializer.HTMLSerializer` that gets created

    :returns: the tree serialized as a string

    Example:

    >>> from html5lib.html5parser import parse
    >>> from html5lib.serializer import serialize
    >>> token_stream = parse('<html><body><p>Hi!</p></body></html>')
    >>> serialize(token_stream, omit_optional_tags=False)
    '<html><head></head><body><p>Hi!</p></body></html>'

     )r   getTreeWalkerHTMLSerializerrender)inputtreeencodingserializer_optswalkerss         r5   	serializerE   K   sC    0 &t,,F))))A88FF5MM8,,,r7   c                   t    e Zd ZdZdZdZdZdZdZdZ	dZ
dZdZdZdZdZdZdZd Zd Zd Zdd
ZddZddZd	S )r=   legacy"TF)quote_attr_values
quote_charuse_best_quote_charomit_optional_tagsminimize_boolean_attributesuse_trailing_solidusspace_before_trailing_solidusescape_lt_in_attrsescape_rcdataresolve_entitiesalphabetical_attributesinject_meta_charsetstrip_whitespacesanitizec                 x   t          |          t          | j                  z
  }t          |          dk    r,t          dt	          t          |                    z            d|v rd| _        | j        D ]5}t          | ||                    |t          | |                               6g | _
        d| _        dS )aB
  Initialize HTMLSerializer

        :arg inject_meta_charset: Whether or not to inject the meta charset.

            Defaults to ``True``.

        :arg quote_attr_values: Whether to quote attribute values that don't
            require quoting per legacy browser behavior (``"legacy"``), when
            required by the standard (``"spec"``), or always (``"always"``).

            Defaults to ``"legacy"``.

        :arg quote_char: Use given quote character for attribute quoting.

            Defaults to ``"`` which will use double quotes unless attribute
            value contains a double quote, in which case single quotes are
            used.

        :arg escape_lt_in_attrs: Whether or not to escape ``<`` in attribute
            values.

            Defaults to ``False``.

        :arg escape_rcdata: Whether to escape characters that need to be
            escaped within normal elements within rcdata elements such as
            style.

            Defaults to ``False``.

        :arg resolve_entities: Whether to resolve named character entities that
            appear in the source tree. The XML predefined entities &lt; &gt;
            &amp; &quot; &apos; are unaffected by this setting.

            Defaults to ``True``.

        :arg strip_whitespace: Whether to remove semantically meaningless
            whitespace. (This compresses all whitespace to a single space
            except within ``pre``.)

            Defaults to ``False``.

        :arg minimize_boolean_attributes: Shortens boolean attributes to give
            just the attribute value, for example::

              <input disabled="disabled">

            becomes::

              <input disabled>

            Defaults to ``True``.

        :arg use_trailing_solidus: Includes a close-tag slash at the end of the
            start tag of void elements (empty elements whose end tag is
            forbidden). E.g. ``<hr/>``.

            Defaults to ``False``.

        :arg space_before_trailing_solidus: Places a space immediately before
            the closing slash in a tag using a trailing solidus. E.g.
            ``<hr />``. Requires ``use_trailing_solidus=True``.

            Defaults to ``True``.

        :arg sanitize: Strip all unsafe or unknown constructs from output.
            See :py:class:`html5lib.filters.sanitizer.Filter`.

            Defaults to ``False``.

        :arg omit_optional_tags: Omit start/end tags that are optional.

            Defaults to ``True``.

        :arg alphabetical_attributes: Reorder attributes to be in alphabetical order.

            Defaults to ``False``.

        r   z2__init__() got an unexpected keyword argument '%s'rJ   FN)	frozensetoptionslen	TypeErrornextiterrK   setattrr'   getattrerrorsstrict)selfkwargsunexpected_argsattrs       r5   __init__zHTMLSerializer.__init__   s    ^ $F++i.E.EE!##PSWX\]lXmXmSnSnnooo6!!',D$L 	G 	GDD$

4t1D1D E EFFFFr7   c                 x    t          |t                    sJ | j        r|                    | j        d          S |S )Nr8   r   r   rA   encoderb   strings     r5   ri   zHTMLSerializer.encode   s>    &),,---= 	==0CDDDMr7   c                 x    t          |t                    sJ | j        r|                    | j        d          S |S )Nra   rh   rj   s     r5   encodeStrictzHTMLSerializer.encodeStrict   s=    &),,---= 	==999Mr7   Nc              #     K   || _         d}g | _        |r| j        rddlm}  |||          }| j        rddlm}  ||          }| j        rddlm}  ||          }| j	        rddl
m}  ||          }| j        rddlm}  ||          }|D ]h}|d         }|dk    rd|d         z  }|d         r|d	|d         z  z  }n|d
         r|dz  }|d
         rj|d
                             d          dk    r7|d
                             d          dk    r|                     d           d}nd}|d||d
         |z  }|dz  }|                     |          V  |dv r|dk    s|rU|r4|d                             d          dk    r|                     d           |                     |d                   V  0|                     t#          |d                             V  \|dv r|d         }	|                     d|	z            V  |	t$          v r
| j        sd}n|r|                     d           |d                                         D ]
\  \  }
}}|}|}|                     d          V  |                     |          V  | j        rH|t-          j        |	t1                                vr|t-          j        dt1                                vr|                     d          V  | j        dk    st5          |          dk    rd}n_| j        dk    rt6                              |          d u}n7| j        dk    rt:                              |          d u}nt=          d           |                    d!d"          }| j         r|                    d#d$          }|r| j!        }| j"        rd|v rd|vrd}n
d|v rd|vrd}|dk    r|                    dd%          }n|                    dd&          }|                     |          V  |                     |          V  |                     |          V  |                     |          V  |	tF          v r=| j$        r6| j%        r|                     d'          V  n|                     d(          V  |                     d          V  2|d)k    rG|d         }	|	t$          v rd}n|r|                     d           |                     d*|	z            V  |d+k    rX|d         }|                    d,          dk    r|                     d-           |                     d.|d         z            V  |d/k    rj|d         }	|	d0z   }|tL          vr|                     d1|	z             | j'        r|tP          vrtL          |         }nd2|	z  }|                     |          V  M|                     |d                    jd S )3NFr	   )FiltertypeDoctypez<!DOCTYPE %snamepublicIdz PUBLIC "%s"systemIdz SYSTEMrH   r   'zBSystem identifier contains both single and double quote characters >)
CharactersSpaceCharactersry   dataz</zUnexpected </ in CDATA)StartTagEmptyTagz<%sTz+Unexpected child element of a CDATA elementr   =alwaysspecrG   z?quote_attr_values must be one of: 'always', 'spec', or 'legacy'r   z&amp;<z&lt;z&#39;z&quot;z //EndTagz</%s>Commentz--zComment contains --z	<!--%s-->Entityr   zEntity %s not recognizedz&%s;))rA   r`   rT   filters.inject_meta_charsetro   rS   filters.alphabeticalattributesrU   filters.whitespacerV   filters.sanitizerrL   filters.optionaltagsfindserializeErrorrm   ri   r   r   rQ   itemsrM   r   r'   tuplerI   rZ   _quoteAttributeSpecsearch_quoteAttributeLegacy
ValueErrorreplacerP   rJ   rK   r
   rN   rO   r   rR   r   )rb   
treewalkerrA   in_cdataro   tokenrp   doctyperJ   rr   _	attr_name
attr_valuekv
quote_attrrz   keys                     r5   rE   zHTMLSerializer.serialize   su       	60 	6;;;;;;
H55J ' 	,>>>>>>
++J   	,222222
++J= 	,111111
++J" 	,444444
++J l	3 l	3E=Dy  (5=8$ )~j0AAAGG:& )y(G$ WZ(--c22a77 ,11#66!;; //0tuuu%(

%(
GJJj8I8I::VVG3''000000:::,,,, FE&M$6$6t$<$<$A$A++,DEEE++eFm444444++fU6]&;&;<<<<<<111V}''55555>))$2D)#HH W''(UVVV27-2E2E2G2G &1 &1.NQ	J!A"A++C00000++A.....; 1"3"7egg"F"FFF"3"7EGG"D"DDD"//444441X==Q1)-JJ!3v==)<)C)CA)F)Fd)RJJ!3x??)>)E)Ea)H)HPT)TJJ", .M #N #N NIIc7332 7 !		#v 6 6A% 1)-J#7 5#&!88114JJ%(AXX#Q,,14J)S00$%IIc7$;$;$%IIc8$<$<"&"3"3J"?"????"&++a..000"&"3"3J"?"?????"&++a..000<''D,E'9 5"//555555"//44444kk#&&&&&&!!V}>))$HH W''(UVVV''$777777""V}99T??a''''(=>>>''eFm(CDDDDDD!!V}Sjh&&''(BT(IJJJ( )S-C-C#C=DD!D=D''------ ##E&M2222Yl	3 l	3r7   c                     |r6d                     t          |                     ||                              S d                     t          |                     |                              S )an  Serializes the stream from the treewalker into a string

        :arg treewalker: the treewalker to serialize

        :arg encoding: the string encoding to use

        :returns: the serialized tree

        Example:

        >>> from html5lib import parse, getTreeWalker
        >>> from html5lib.serializer import HTMLSerializer
        >>> token_stream = parse('<html><body>Hi!</body></html>')
        >>> walker = getTreeWalker('etree')
        >>> serializer = HTMLSerializer(omit_optional_tags=False)
        >>> serializer.render(walker(token_stream))
        '<html><head></head><body>Hi!</body></html>'

        r7   r   )r*   listrE   )rb   r   rA   s      r5   r>   zHTMLSerializer.renderw  sZ    (  	=88D
H!E!EFFGGG774z : :;;<<<r7   XXX ERROR MESSAGE NEEDEDc                 V    | j                             |           | j        rt          d S N)r`   r%   ra   SerializeError)rb   rz   s     r5   r   zHTMLSerializer.serializeError  s2    4   ; 	!  	! 	!r7   r   )r   )__name__
__module____qualname__rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   rT   rU   rV   rY   rf   ri   rm   rE   r>   r   r;   r7   r5   r=   r=   h   s         !J "& $(! M $H/GW W Wr    G3 G3 G3 G3R= = = =2! ! ! ! ! !r7   r=   c                       e Zd ZdZdS )r   zError in serialized treeN)r   r   r   __doc__r;   r7   r5   r   r     s        ""Dr7   r   )r9   N)+
__future__r   r   r   bleach.six_shimr   recodecsr   r   	constantsr
   r   r   r   r   r   r   r   r   xml.sax.saxutilsr   r*   _quoteAttributeSpecCharscompiler   r   r&   rZ   _is_ucs4r   r   r   r   r#   r$   islowerr6   rE   r   r=   	Exceptionr   r;   r7   r5   <module>r      s   B B B B B B B B B B % % % % % % 				 ; ; ; ; ; ; ; ; G G G G G G G G G G < < < < < < < < < < ! ! ! ! ! ! ! ! # # # # # #77?33i?  bj'?!?#!EFF "
3)A#A$-$- . .   3|!D!!"" & &DAq	 cc!ffqjj !!c!ffqjjCxx3q66Q;;//22AAAA&&&!))++&$%q!- - -< "$< = = =- - - -:l! l! l! l! l!V l! l! l!^		 	 	 	 	Y 	 	 	 	 	r7   