
    kYwF                        d Z ddlZddlZ	 ddlZn# e$ r ddlZY nw xY wddlmZ dgZ ej	        d          Z
 ej	        d          Z ej	        d          Z ej	        d	          Z ej	        d
          Z ej	        d          Z ej	        d          Z ej	        d          Z ej	        d          Z ej	        dej                  Z ej	        d          Z ej	        d          Z G d dej                  ZdS )zA parser for HTML and XHTML.    N   unescape
HTMLParserz[&<]z
&[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z	<[a-zA-Z]>z--\s*>z+([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*z]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*aF  
  <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
  (?:[\s/]*                          # optional whitespace before attribute name
    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
      (?:\s*=+\s*                    # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |"[^"]*"                   # LIT-enclosed value
          |(?!['"])[^>\s]*           # bare value
         )
         (?:\s*,)*                   # possibly followed by a comma
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                # trailing whitespace
z#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c                       e Zd ZdZdZddZd Zd Zd ZdZ	d	 Z
d
 Zd Zd Zd Zd dZd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd ZdS )!r   aE  Find tags and other markup and call handler functions.

    Usage:
        p = HTMLParser()
        p.feed(data)
        ...
        p.close()

    Start tags are handled by calling self.handle_starttag() or
    self.handle_startendtag(); end tags by self.handle_endtag().  The
    data between tags is passed from the parser to the derived class
    by calling self.handle_data() with the data as argument (the data
    may be split up in arbitrary chunks).  If convert_charrefs is
    True the character references are converted automatically to the
    corresponding Unicode character (and self.handle_data() is no
    longer split in chunks), otherwise they are passed by calling
    self.handle_entityref() or self.handle_charref() with the string
    containing respectively the named or numeric reference as the
    argument.
    )scriptstyleTc                 <    || _         |                                  dS )zInitialize and reset this instance.

        If convert_charrefs is True (the default), all character references
        are automatically converted to the corresponding Unicode characters.
        N)convert_charrefsreset)selfr   s     Z/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/htmlmin/python3html/parser.py__init__zHTMLParser.__init__e   s     !1

    c                     d| _         d| _        t          | _        d| _        t
          j                            |            dS )z1Reset this instance.  Loses all unprocessed data. z???N)rawdatalasttaginteresting_normalinteresting
cdata_elem
markupbase
ParserBaser   r   s    r   r   zHTMLParser.resetn   s<    -##D)))))r   c                 N    | j         |z   | _         |                     d           dS )zFeed data to the parser.

        Call this as often as you want, with as little or as much text
        as you want (may include '\n').
        r   N)r   goaheadr   datas     r   feedzHTMLParser.feedv   s%     |d*Qr   c                 0    |                      d           dS )zHandle any buffered data.r   N)r   r   s    r   closezHTMLParser.close   s    Qr   Nc                     | j         S )z)Return full source of start tag: '<...>'.)_HTMLParser__starttag_textr   s    r   get_starttag_textzHTMLParser.get_starttag_text   s    ##r   c                     |                                 | _        t          j        d| j        z  t          j                  | _        d S )Nz</\s*%s\s*>)lowerr   recompileIr   )r   elems     r   set_cdata_modezHTMLParser.set_cdata_mode   s4    **,,:nt&FMMr   c                 ,    t           | _        d | _        d S N)r   r   r   r   s    r   clear_cdata_modezHTMLParser.clear_cdata_mode   s    -r   c                    | j         }d}t          |          }||k     r/| j        r}| j        sv|                    d|          }|dk     rY|                    dt          ||dz
                      }|dk    r*t          j        d          	                    ||          sn|}n=| j
        	                    ||          }|r|                                }n| j        rnp|}||k     r\| j        r8| j        s1|                     |                     |||                              n|                     |||                    |                     ||          }||k    rn|j        } |d|          rt                               ||          r|                     |          }	n |d|          r|                     |          }	n |d|          r|                     |          }	nj |d|          r|                     |          }	nH |d	|          r|                     |          }	n&|d
z   |k     r|                     d           |d
z   }	nn|	dk     r|sn|                    d|d
z             }	|	dk     r%|                    d|d
z             }	|	dk     r|d
z   }	n|	d
z  }	| j        r8| j        s1|                     |                     |||	                              n|                     |||	                    |                     ||	          }n- |d|          rt.                              ||          }|rq|                                dd         }
|                     |
           |                                }	 |d|	d
z
            s|	d
z
  }	|                     ||	          }d||d          v r9|                     |||dz                       |                     ||dz             }nS |d|          r5t6                              ||          }|rj|                    d
          }
|                     |
           |                                }	 |d|	d
z
            s|	d
z
  }	|                     ||	          }wt:                              ||          }|rX|rU|                                ||d          k    r5|                                }	|	|k    r|}	|                     ||d
z             }nJ|d
z   |k     r/|                     d           |                     ||d
z             }nnJ d            ||k     /|r||k     ry| j        sr| j        r8| j        s1|                     |                     |||                              n|                     |||                    |                     ||          }||d          | _         d S )Nr   <&"   z[\s;]</<!--<?<!r   r   z&#   ;zinteresting.search() lied)r   lenr   r   findrfindmaxr(   r)   searchr   starthandle_datar   	updatepos
startswithstarttagopenmatchparse_starttagparse_endtagparse_commentparse_piparse_html_declarationcharrefgrouphandle_charrefend	entityrefhandle_entityref
incomplete)r   rN   r   injampposrE   rC   knames              r   r   zHTMLParser.goahead   sD   ,LL!ee$ T_ LLa((q55 %]]3Aqt==F!Jx0077HH $A(//;; AA A1uu( 3 3$$T]]71Q3<%@%@AAAA$$WQqS\222q!$$AAvvu +Jz#q!! J6%%gq11 ++A..AAZa(( ))!,,AAZ** 
**1--AAZa(( a((AAZa(( 33A66AA!eq[[$$S)))AAAq55 S!a%00A1uu#LLa!e44q55 !AAQ, 7T_ 7((wqs|)D)DEEEE((1666NN1a((D!$$ +6gq11  ;;==2.D''---		A%:c1Q3// "Eq!,,Agabbk))((1Q3888 NN1ac22C## 6!33  ;;q>>D))$///		A%:c1Q3// "Eq!,,A"((!44  5u{{}};;!IIKK66 !A NN1a!e44!eq[[ $$S)))q!a%00AA55555S !eeV  	%1q555$ /T_ /  wqs|!<!<====  1...q!$$Aqrr{r   c                    | j         }|||dz            dk    s
J d            |||dz            dk    r|                     |          S |||dz            dk    r|                     |          S |||dz                                            d	k    rF|                    d
|dz             }|dk    rdS |                     ||dz   |                    |dz   S |                     |          S )Nr8   r7   z+unexpected call to parse_html_declaration()   r5      z<![	   z	<!doctyper   r9   r   )r   rH   parse_marked_sectionr'   r<   handle_declparse_bogus_comment)r   rR   r   gtposs       r   rJ   z!HTMLParser.parse_html_declaration  s   ,q1u~%%% )C%%%1QqS5>V##%%a(((QqsU^u$$,,Q///QqsU^!!##{22LLac**E{{rWQqSY/0007N++A...r   r   c                     | j         }|||dz            dv s
J d            |                    d|dz             }|dk    rdS |r |                     ||dz   |                    |dz   S )Nr8   )r7   r4   z"unexpected call to parse_comment()r   r9   r   )r   r<   handle_comment)r   rR   reportr   poss        r   r^   zHTMLParser.parse_bogus_comment#  s    ,q1u~--- 1B---ll3!$$"992 	2!C 0111Qwr   c                    | j         }|||dz            dk    s
J d            t                              ||dz             }|sdS |                                }|                     ||dz   |                    |                                }|S )Nr8   r6   zunexpected call to parse_pi()r9   )r   picloser?   r@   	handle_pirN   )r   rR   r   rE   rT   s        r   rI   zHTMLParser.parse_pi/  s    ,q1u~%%%'F%%%w!,, 	2KKMMwqsAv'''IIKKr   c                 j   d | _         |                     |          }|dk     r|S | j        }|||         | _         g }t                              ||dz             }|s
J d            |                                }|                    d                                          x| _        }||k     rt                              ||          }|sn|                    ddd          \  }	}
}|
sd }nI|d d         dcxk    r|dd          k    s"n |d d         dcxk    r|dd          k    rn n
|dd         }|r| 
                    |          }|                    |	                                |f           |                                }||k     |||                                         }|d	vr|                                 \  }}d
| j         v rM|| j                             d
          z   }t          | j                   | j                             d
          z
  }n|t          | j                   z   }|                     |||                    |S |                    d          r|                     ||           n4|                     ||           || j        v r|                     |           |S )Nr   r   z#unexpected call to parse_starttag()r8   rZ   'r9   ")r   />
rj   )r$   check_for_whole_start_tagr   tagfind_tolerantrE   rN   rL   r'   r   attrfind_tolerantr   appendstripgetposcountr;   r=   rA   endswithhandle_startendtaghandle_starttagCDATA_CONTENT_ELEMENTSr,   )r   rR   endposr   attrsrE   rV   tagmattrnamerest	attrvaluerN   linenooffsets                  r   rF   zHTMLParser.parse_starttag;  s   #//22A::M,&qx0  &&w!44;;;;;;IIKK"[[^^11333s&jj!''33A ()1a(8(8%HdI , 		2A2$8888)BCC.88882A2#777723377777%adO	 5 MM)44	LL(..**I6777A &jj ah%%''k!!![[]]NFFt+++$"6"<"<T"B"BBT122/55d;;<  #d&:";";;WQvX.///M<< 	)##C////  e,,,d111##C(((r   c                    | j         }t                              ||          }|r|                                }|||dz            }|dk    r|dz   S |dk    r@|                    d|          r|dz   S |                    d|          rdS ||k    r|S |dz   S |dk    rdS |dv rdS ||k    r|S |dz   S t          d	          )
Nr   r   /rj   r8   r9   r   z6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZzwe should not get here!)r   locatestarttagend_tolerantrE   rN   rC   AssertionError)r   rR   r   rz   rT   nexts         r   rl   z$HTMLParser.check_for_whole_start_tagn  s   ,&,,Wa88 	A1QqS5>Ds{{1us{{%%dA.. !q5L%%c1-- 2q55Hq5Lrzzr 5 6 6 r1uu1u6777r   c                    | j         }|||dz            dk    s
J d            t                              ||dz             }|sdS |                                }t                              ||          }|s| j        |                     |||                    |S t                              ||dz             }|s+|||dz            dk    r|dz   S | 	                    |          S |
                    d                                          }|                    d|                                          }|                     |           |dz   S |
                    d                                          }| j        *|| j        k    r|                     |||                    |S |                     |                                           |                                  |S )	Nr8   r4   zunexpected call to parse_endtagr   r9   rZ   z</>r   )r   	endendtagr?   rN   
endtagfindrE   r   rA   rm   r^   rL   r'   r<   handle_endtagr/   )r   rR   r   rE   r_   	namematchtagnamer+   s           r   rG   zHTMLParser.parse_endtag  s   ,q1u~%%%'H%%%  !A#.. 	2		  !,, 	*  5!1222(..w!<<I 71QqS5>U**Q3J33A666ooa((..00G
 LLimmoo66Ew'''7N{{1~~##%%?&t&&  5!12224::<<(((r   c                 \    |                      ||           |                     |           d S r.   )ru   r   r   ry   rx   s      r   rt   zHTMLParser.handle_startendtag  s2    S%(((3r   c                     d S r.    r   s      r   ru   zHTMLParser.handle_starttag      r   c                     d S r.   r   )r   ry   s     r   r   zHTMLParser.handle_endtag  r   r   c                     d S r.   r   r   rW   s     r   rM   zHTMLParser.handle_charref  r   r   c                     d S r.   r   r   s     r   rP   zHTMLParser.handle_entityref  r   r   c                     d S r.   r   r   s     r   rA   zHTMLParser.handle_data  r   r   c                     d S r.   r   r   s     r   ra   zHTMLParser.handle_comment  r   r   c                     d S r.   r   )r   decls     r   r]   zHTMLParser.handle_decl  r   r   c                     d S r.   r   r   s     r   rf   zHTMLParser.handle_pi  r   r   c                     d S r.   r   r   s     r   unknown_declzHTMLParser.unknown_decl  r   r   c                      t          |          S r.   r   )r   ss     r   r   zHTMLParser.unescape  s    {{r   )T)r   )__name__
__module____qualname____doc__rv   r   r   r    r"   r$   r%   r,   r/   r   rJ   r^   rI   rF   rl   rG   rt   ru   r   rM   rP   rA   ra   r]   rf   r   r   r   r   r   r   r   M   s        * 1   * * *     O$ $ $N N N  u# u# u#t/ / /*	 	 	 		 	 	/ / /f8 8 8D% % %P     
                      r   )r   r(   warnings_markupbaser   ImportErrorr   r   __all__r)   r   rQ   rO   rK   rD   re   commentcloserm   rn   VERBOSEr   r   r   r   r   r   r   r   <module>r      s   " "* 
			 $$$$$          .  RZ'' RZ%%
BJ>??	
"*@
A
Arz+&&
"*S//rz)$$ 2:LMM BJ=> >  (RZ ) Z   BJsOO	 RZ>??
T T T T T& T T T T Ts    	