
    MhgW                     F   d dl Z d dlmZ d dlmZ d dlmZ ej        gZd	                                Z
e
                                 e
ej        fdZ e            Z e j        de j                  Ze
fdZ e            Z G d	 d
          Z G d dej                  ZdS )    N)quote	callbacks)html5lib_shima  ac ad ae aero af ag ai al am an ao aq ar arpa as asia at au aw ax az
       ba bb bd be bf bg bh bi biz bj bm bn bo br bs bt bv bw by bz ca cat
       cc cd cf cg ch ci ck cl cm cn co com coop cr cu cv cx cy cz de dj dk
       dm do dz ec edu ee eg er es et eu fi fj fk fm fo fr ga gb gd ge gf gg
       gh gi gl gm gn gov gp gq gr gs gt gu gw gy hk hm hn hr ht hu id ie il
       im in info int io iq ir is it je jm jo jobs jp ke kg kh ki km kn kp
       kr kw ky kz la lb lc li lk lr ls lt lu lv ly ma mc md me mg mh mil mk
       ml mm mn mo mobi mp mq mr ms mt mu museum mv mw mx my mz na name nc ne
       net nf ng ni nl no np nr nu nz om org pa pe pf pg ph pk pl pm pn post
       pr pro ps pt pw py qa re ro rs ru rw sa sb sc sd se sg sh si sj sk sl
       sm sn so sr ss st su sv sx sy sz tc td tel tf tg th tj tk tl tm tn to
       tp tr travel tt tv tw tz ua ug uk us uy uz va vc ve vg vi vn vu wf ws
       xn xxx ye yt yu za zm zwc           
         t          j        d                    d                    t	          |                    d                    t	          |                               t           j        t           j        z  t           j        z            S )aD  Builds the url regex used by linkifier

    If you want a different set of tlds or allowed protocols, pass those in
    and stomp on the existing ``url_re``::

        from bleach import linkifier

        my_url_re = linkifier.build_url_re(my_tlds_list, my_protocols)

        linker = LinkifyFilter(url_re=my_url_re)

    a^  \(*  # Match any opening parentheses.
        \b(?<![@.])(?:(?:{0}):/{{0,3}}(?:(?:\w+:)?\w+@)?)?  # http://
        ([\w-]+\.)+(?:{1})(?:\:[0-9]+)?(?!\.\w)\b   # xx.yy.tld(:##)?
        (?:[/?][^\s\{{\}}\|\\\^`<>"]*)?
            # /path/zz (excluding "unsafe" chars from RFC 3986,
            # except for # and ~, which happen in practice)
        |)recompileformatjoinsorted
IGNORECASEVERBOSEUNICODE)tlds	protocolss     P/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/bleach/linkifier.pybuild_url_rer      sn     :	 FHHVI&&''&,,)?)?
 
 	
"RZ/      z^[\w-]+:/{0,3}c                     t          j        d                    d                    |                     t           j        t           j        z  t           j        z            S )a'  Builds the email regex used by linkifier

    If you want a different set of tlds, pass those in and stomp on the existing ``email_re``::

        from bleach import linkifier

        my_email_re = linkifier.build_email_re(my_tlds_list)

        linker = LinkifyFilter(email_re=my_url_re)

    a4  (?<!//)
        (([-!#$%&'*+/=?^_`{{}}|~0-9A-Z]+
            (\.[-!#$%&'*+/=?^_`{{}}|~0-9A-Z]+)*  # dot-atom
        |^"([\001-\010\013\014\016-\037!#-\[\]-\177]
            |\\[\001-\011\013\014\016-\177])*"  # quoted-string
        )@(?:[A-Z0-9](?:[A-Z0-9-]{{0,61}}[A-Z0-9])?\.)+(?:{0}))  # domain
        r   )r	   r
   r   r   r   	MULTILINEr   )r   s    r   build_email_rer   @   sP     :	 FHHTNN
 
 	$rz1  r   c                   6    e Zd ZdZeddeeej        fdZ	d Z
dS )Linkera  Convert URL-like strings in an HTML fragment to links

    This function converts strings that look like URLs, domain names and email
    addresses in text that may be an HTML fragment to links, while preserving:

    1. links already in the string
    2. urls found in attributes
    3. email addresses

    linkify does a best-effort approach and tries to recover from bad
    situations due to crazy text.

    NFc                    || _         || _        || _        || _        || _        t          j        t          |          ddd          | _        t          j	        d          | _
        t          j        ddddd          | _        dS )a  Creates a Linker instance

        :arg list callbacks: list of callbacks to run when adjusting tag attributes;
            defaults to ``bleach.linkifier.DEFAULT_CALLBACKS``

        :arg set skip_tags: set of tags that you don't want to linkify the
            contents of; for example, you could set this to ``{'pre'}`` to skip
            linkifying contents of ``pre`` tags; ``None`` means you don't
            want linkify to skip any tags

        :arg bool parse_email: whether or not to linkify email addresses

        :arg url_re: url matching regex

        :arg email_re: email matching regex

        :arg set recognized_tags: the set of tags that linkify knows about;
            everything else gets escaped

        :returns: linkified text as unicode

        F)tagsstripconsume_entitiesnamespaceHTMLElementsetreealways)quote_attr_valuesomit_optional_tagsresolve_entitiessanitizealphabetical_attributesN)r   	skip_tagsparse_emailurl_reemail_rer   BleachHTMLParser	frozensetparsergetTreeWalkerwalkerBleachHTMLSerializer
serializer)selfr   r'   r(   r)   r*   recognized_tagss          r   __init__zLinker.__init__m   s    > #"&  $4?++""'	
 
 
 $1'::'<&$ #$)

 

 

r   c                 <   t          |t                    st          d          |sdS | j                            |          }t          |                     |          | j        | j        | j	        | j
        | j                  }| j                            |          S )zLinkify specified text

        :arg str text: the text to add links to

        :returns: linkified text as unicode

        :raises TypeError: if ``text`` is not a text type

        zargument must be of text type )sourcer   r'   r(   r)   r*   )
isinstancestr	TypeErrorr-   parseFragmentLinkifyFilterr/   r   r'   r(   r)   r*   r1   render)r2   textdomfiltereds       r   linkifyzLinker.linkify   s     $$$ 	=;<<< 	2k''-- ;;s##nn(;]
 
 
 %%h///r   )__name__
__module____qualname____doc__DEFAULT_CALLBACKSURL_REEMAIL_REr   	HTML_TAGSr4   rA    r   r   r   r   ^   sY           $%/8
 8
 8
 8
t0 0 0 0 0r   r   c                   b     e Zd ZdZeddeef fd	Zd Zd Z	d Z
d Zd	 Zd
 Zd Z fdZ xZS )r<   aD  html5lib filter that linkifies text

    This will do the following:

    * convert email addresses into links
    * convert urls into links
    * edit existing links by running them through callbacks--the default is to
      add a ``rel="nofollow"``

    This filter can be used anywhere html5lib filters can be used.

    NFc                     t                                          |           |pg | _        |pi | _        || _        || _        || _        dS )ab  Creates a LinkifyFilter instance

        :arg source: stream as an html5lib TreeWalker

        :arg list callbacks: list of callbacks to run when adjusting tag attributes;
            defaults to ``bleach.linkifier.DEFAULT_CALLBACKS``

        :arg set skip_tags: set of tags that you don't want to linkify the
            contents of; for example, you could set this to ``{'pre'}`` to skip
            linkifying contents of ``pre`` tags

        :arg bool parse_email: whether or not to linkify email addresses

        :arg url_re: url matching regex

        :arg email_re: email matching regex

        N)superr4   r   r'   r(   r)   r*   )r2   r7   r   r'   r(   r)   r*   	__class__s          r   r4   zLinkifyFilter.__init__   sM    6 	   "b"b& r   c                 <    | j         D ]} |||          }| dS |S )a  Given an attrs dict and an is_new bool, runs through callbacks

        Callbacks can return an adjusted attrs dict or ``None``. In the case of
        ``None``, we stop going through callbacks and return that and the link
        gets dropped.

        :arg dict attrs: map of ``(namespace, name)`` -> ``value``

        :arg bool is_new: whether or not this link was added by linkify

        :returns: adjusted attrs dict or ``None``

        Nr   )r2   attrsis_newcbs       r   apply_callbackszLinkifyFilter.apply_callbacks   s<     . 	 	BBuf%%E}tt r   c                     g }|D ])}|d         }|dv r|                     |d                    *d                    |          S )z;Extracts and squashes character sequences in a token streamtype)
CharactersSpaceCharactersdatar6   )appendr   )r2   
token_listouttoken
token_types        r   extract_character_dataz$LinkifyFilter.extract_character_data	  sT    &  	* 	*EvJ>>>

5=)))wws||r   c           	   #     K   |D ]}|d         dk    r|d         }g }d}| j                             |          D ]Z}|                                |k    r2|                    d|||                                         d           |                    d                              d          }t          |d                   |d<   d                    |          }d|z  |                    d          d}	|                     |	d	          }	|	,|                    d|                    d          d           nE|		                    dd          }
|
                    dd|	ddt          |
          ddddg           |                                }\|r=|t          |          k     r |                    d||d
         d           |E d
{V  |V  d
S )z*Handle email addresses in character tokensrU   rV   rX   r   rU   rX   @z	mailto:%s)Nhref_textTNrd   r6   StartTagarU   namerX   EndTagrU   rh   )r*   finditerstartrY   groupsplitr   r   rS   popextendr9   endlen)r2   src_iterr\   r>   
new_tokensrq   matchpartsaddressrP   rd   s              r   handle_email_addressesz$LinkifyFilter.handle_email_addresses$  s,      6	 6	EV},,V}
 "]33D99 #& #&E{{}}s**"))%14ekkmm@S;TUU  
 "KKNN0055E$U1XE!H!hhuooG
 )4g(=!&Q E !00==E}"))%15;;q>>JJ    !&		'2 6 6")))3S% P P)5s5zz J J)13 ? ?    ))++CC  SYY"))<cdd*T*TUUU))))))))KKKKm6	 6	r   c                    dx}}|r|                     d          r4|dz   }|dd         }|                    d          rd|z   }|dd         }K|                    d          rd|vr|dd         }d|z   }t|                    d          r|dd         }d|z   }|                    d          r|dd         }d|z   }	 |||fS )	zgStrips non-url bits from the url

        This accounts for over-eager matching by the regex.

        r6   (   N),.)
startswithendswith)r2   fragmentprefixsuffixs       r   strip_non_url_bitsz LinkifyFilter.strip_non_url_bits^  s"     $	 ""3'' ##ABB<$$S)) - 6\F'}H   %% #X*=*=#CRC=v   %% #CRC=v   %% #CRC=v ''r   c           	   #   `  K   d}|D ]%}|r|d         dk    r|d         dk    rd}|V  $|d         dk    r|d         dk    rd}|V  C|d         dk    r|d	         }g }d
}| j                             |          D ]j}|                                |k    r2|                    d|||                                         d           |                    d
          }dx}	}
|                     |          \  }}	}
t                              |          r|}nd|z  }||d}|                     |d          }||                    d|	|z   |
z   d           ny|	r|                    d|	d           |	                    dd          }|
                    dd|ddt          |          ddddg           |
r|                    d|
d           |                                }l|r=|t          |          k     r |                    d||d         d           |E d{V  !|V  'dS )z Handle links in character tokensFrU   ri   rh   rf   re   TrV   rX   r   r`   r6   z	http://%srb   Nrd   rg   rj   )r)   rk   rl   rY   rm   r   PROTO_REsearchrS   ro   rp   r9   rq   rr   )r2   rs   in_ar\   r>   rt   rq   ru   urlr   r   rc   rP   rd   s                 r   handle_linkszLinkifyFilter.handle_links  s      H	 H	E =H,,v#1E1E Dv*,,v#1E1EV},,V}
![11$77 -& -&E{{}}s**"))%14ekkmm@S;TUU    ++a..C&((FV +/*A*A#*F*F'C  s++ 1"*S0-1C@@E 00==E}"))%16C<&;PQQ    " V&--|V.T.TUUU %		'2 6 6")))3S% P P)5s5zz J J)13 ? ?   " V&--|V.T.TUUU))++CC  SYY"))<cdd*T*TUUU))))))))KKKKQH	 H	r   c              #   h  K   |d         }|d         r	|d         }ni }|                      |          }||d<   |                     |d          }|	d|dV  dS |                    dd          }||d<   ||k    r|V  |d	d         E d{V  dS |V  dt          |          dV  |d
         V  dS )zHandle the "a" tag

        This could adjust the link or drop it altogether depending on what the
        callbacks return.

        This yields the new set of tokens.

        r   rX   rd   FNrV   r`   r6   r{   r}   )r^   rS   ro   r9   )r2   token_buffera_tokenrP   r>   new_texts         r   handle_a_tagzLinkifyFilter.handle_a_tag  s      q/6? 	FOEEE**<88g$$UE22= (6666666 yy"--H#GFOx '++++++++++ +S]]CCCCC"2&&&&&&r   c              #     K   |                     dd          }d|vr|V  dS g }t          j        |          D ]}|s|                    d          rt          j        |          }|l|dk    r|                    ddd           n|                    d|d	           |t          |          d
z   d         }|r|                    d|d           |                    d|d           |E d{V  dS )a  Handles Characters tokens with entities

        Our overridden tokenizer doesn't do anything with entities. However,
        that means that the serializer will convert all ``&`` in Characters
        tokens to ``&amp;``.

        Since we don't want that, we extract entities here and convert them to
        Entity tokens so the serializer will let them be.

        :arg token: the Characters token to work on

        :returns: generator of tokens

        rX   r6   &NamprV   r`   Entityrj      )getr   next_possible_entityr   match_entityrY   rr   )r2   r\   rX   rt   partentity	remainders          r   extract_entitieszLinkifyFilter.extract_entities  sS      yy$$ d??KKKF
 "6t<< 	D 	DD s## &3D99% #))<*M*MNNNN"))8V*L*LMMM !%S[[1_%6%6 7I  U"))<*S*STTT|TBBCCCCr   c              #   0  K   d}d }g }t                                                      D ]j}|r|d         dk    rA|d         dk    r5|                    |           |                     |          E d {V  d}g }n5|                    t          |                     |                               |d         dv r=|d         | j        v r	|d         }n|d         dk    rd}|                    |           n|r"| j        r|d         dk    r|d         |k    rd }ns|sq|so|d         dk    rct          |g          }| j	        r| 
                    |          }|                     |          }|D ]}|                     |          E d {V  f|V  ld S )	NFrU   ri   rh   rf   )re   EmptyTagTrV   )rM   __iter__rY   r   rp   listr   r'   iterr(   rx   r   )r2   r   in_skip_tagr   r\   
new_stream	new_tokenrN   s          r   r   zLinkifyFilter.__iter__=  s     WW%%'' 6	 6	E  =H,,v#1E1E !''...#00>>>>>>>>> !D#%LL ''T-B-B5-I-I(J(JKKKV} 888=DN22 #(-KK6]c))  D ''...  *    =H,,v+1M1M"&K + %-<2O2O!5']]
# I!%!<!<Z!H!HJ!..z::
!+ @ @I#44Y?????????? KKKKm6	 6	r   )rB   rC   rD   rE   rF   rG   rH   r4   rS   r^   rx   r   r   r   r   r   __classcell__)rN   s   @r   r<   r<      s           $"! "! "! "! "! "!H  (  68 8 8t.( .( .(`K K KZ(' (' ('T6 6 6p< < < < < < < < <r   r<   )r	   urllib.parser   bleachr   linkify_callbacksr   nofollowrF   rn   TLDSreverseallowed_protocolsr   rG   r
   r   r   r   rH   r   Filterr<   rJ   r   r   <module>r      sS   				       1 1 1 1 1 1             '/0 # $)577   m&E    6 
 2:'77     6 >b0 b0 b0 b0 b0 b0 b0 b0Jv v v v vM( v v v v vr   