
    Mhj0                        d dl mZ d dlmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZmZmZmZ ddlmZmZ dd	lmZmZmZ  G d
 d          Z G d d          Zeeef         Zee         Z G d d          ZdS )    )annotations)aliases)sha256)dumps)sub)AnyIteratorListTuple   )RE_POSSIBLE_ENCODING_INDICATIONTOO_BIG_SEQUENCE)	iana_nameis_multi_byte_encodingunicode_rangec                     e Zd Z	 	 d/d0dZd1dZd1dZed2d            Zd3dZd3dZ	d4dZ
ed3d            Zed5d            Zed6d            Zed6d            Zed5d            Zed3d             Zed2d!            Zed2d"            Zed2d#            Zed2d$            Zed7d%            Zed8d'            Zed6d(            Zed5d)            Zed5d*            Zd9d:d-Zed3d.            ZdS );CharsetMatchNpayloadbytesguessed_encodingstrmean_mess_ratiofloathas_sig_or_bombool	languagesCoherenceMatchesdecoded_payload
str | Nonepreemptive_declarationc                    || _         || _        || _        || _        || _        d | _        g | _        d| _        d | _        d | _	        || _
        || _        d S )N        )_payload	_encoding_mean_mess_ratio
_languages_has_sig_or_bom_unicode_ranges_leaves_mean_coherence_ratio_output_payload_output_encoding_string_preemptive_declaration)selfr   r   r   r   r   r   r    s           Y/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/charset_normalizer/models.py__init__zCharsetMatch.__init__   se      '.'6,5%315+-,/"-1,0#23I$$$    otherobjectreturnc                    t          |t                    s/t          |t                    rt          |          | j        k    S dS | j        |j        k    o| j        |j        k    S )NF)
isinstancer   r   r   encodingfingerprintr/   r3   s     r0   __eq__zCharsetMatch.__eq__*   s_    %.. 	%%% 9 ''4=885}.X43CuGX3XXr2   c                   t          |t                    st          t          | j        |j        z
            }t          | j        |j        z
            }|dk     r|dk    r| j        |j        k    S |dk     rC|dk    r=t          | j                  t          k    r| j        |j        k     S | j	        |j	        k    S | j        |j        k     S )zQ
        Implemented to make sorted available upon CharsetMatches items.
        g{Gz?g{Gz?)
r7   r   
ValueErrorabschaos	coherencelenr#   r   multi_byte_usage)r/   r3   chaos_differencecoherence_differences       r0   __lt__zCharsetMatch.__lt__1   s     %.. 	"%dj5;&>"?"?&)$.5?*J&K&K d""';d'B'B>EO33$$)=)E)E 4=!!%555zEK//(5+AAAzEK''r2   c                j    dt          t          |                     t          | j                  z  z
  S )Ng      ?)rA   r   rawr/   s    r0   rB   zCharsetMatch.multi_byte_usageG   s&    c#d))nns48}}455r2   c                ^    | j          t          | j        | j        d          | _         | j         S )Nstrict)r-   r   r#   r$   rH   s    r0   __str__zCharsetMatch.__str__K   s)    <t}dnhGGDL|r2   c                (    d| j          d| j         dS )Nz<CharsetMatch 'z' bytes(z)>)r8   r9   rH   s    r0   __repr__zCharsetMatch.__repr__Q   s     LLL8HLLLLr2   Nonec                    t          |t                    r|| k    r't          d                    |j                            d |_        | j                            |           d S )Nz;Unable to add instance <{}> as a submatch of a CharsetMatch)r7   r   r=   format	__class__r-   r)   appendr:   s     r0   add_submatchzCharsetMatch.add_submatchT   sk    %.. 	%4--MTTO    E"""""r2   c                    | j         S N)r$   rH   s    r0   r8   zCharsetMatch.encoding_   s
    ~r2   	list[str]c                    g }t          j                    D ]F\  }}| j        |k    r|                    |           &| j        |k    r|                    |           G|S )z
        Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855.
        )r   itemsr8   rR   )r/   also_known_asups       r0   encoding_aliaseszCharsetMatch.encoding_aliasesc   sn    
 $&MOO 	( 	(DAq}!!$$Q''''!##$$Q'''r2   c                    | j         S rU   r'   rH   s    r0   bomzCharsetMatch.bomp       ##r2   c                    | j         S rU   r^   rH   s    r0   byte_order_markzCharsetMatch.byte_order_markt   r`   r2   c                $    d | j         D             S )z
        Return the complete list of possible languages found in decoded sequence.
        Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'.
        c                    g | ]
}|d          S )r    ).0es     r0   
<listcomp>z*CharsetMatch.languages.<locals>.<listcomp>~   s    ...!...r2   r&   rH   s    r0   r   zCharsetMatch.languagesx   s     /.do....r2   c                   | j         shd| j        v rdS ddlm}m} t          | j                  r || j                  n || j                  }t          |          dk    sd|v rdS |d         S | j         d         d         S )z
        Most probable language found in decoded sequence. If none were detected or inferred, the property will return
        "Unknown".
        asciiEnglishr   )encoding_languagesmb_encoding_languageszLatin BasedUnknown)r&   could_be_from_charsetcharset_normalizer.cdrm   rn   r   r8   rA   )r/   rm   rn   r   s       r0   languagezCharsetMatch.language   s      	  $444 y XWWWWWWW *$-887%%dm444''66  9~~""my&@&@ yQ<q!!$$r2   c                    | j         S rU   )r%   rH   s    r0   r?   zCharsetMatch.chaos   s    $$r2   c                :    | j         sdS | j         d         d         S )Nr"   r   r   ri   rH   s    r0   r@   zCharsetMatch.coherence   s#     	3q!!$$r2   c                4    t          | j        dz  d          S Nd      )ndigits)roundr?   rH   s    r0   percent_chaoszCharsetMatch.percent_chaos   s    TZ#%q1111r2   c                4    t          | j        dz  d          S rv   )rz   r@   rH   s    r0   percent_coherencezCharsetMatch.percent_coherence   s    T^c)15555r2   c                    | j         S )z+
        Original untouched bytes.
        )r#   rH   s    r0   rG   zCharsetMatch.raw   s    
 }r2   list[CharsetMatch]c                    | j         S rU   )r)   rH   s    r0   submatchzCharsetMatch.submatch   s
    |r2   c                2    t          | j                  dk    S Nr   )rA   r)   rH   s    r0   has_submatchzCharsetMatch.has_submatch   s    4<  1$$r2   c                    | j         | j         S d t          |           D             }t          t          d |D                                 | _         | j         S )Nc                ,    g | ]}t          |          S re   )r   )rf   chars     r0   rh   z*CharsetMatch.alphabets.<locals>.<listcomp>   s     ,W,W,WT]4-@-@,W,W,Wr2   c                    h | ]}||S re   re   )rf   rs     r0   	<setcomp>z)CharsetMatch.alphabets.<locals>.<setcomp>   s    +L+L+L!!+LA+L+L+Lr2   )r(   r   sortedlist)r/   detected_rangess     r0   	alphabetszCharsetMatch.alphabets   s^    +'',W,WSQUYY,W,W,W%d+L+L+L+L+L&M&MNN##r2   c                6    | j         gd | j        D             z   S )z
        The complete list of encoding that output the exact SAME str result and therefore could be the originating
        encoding.
        This list does include the encoding available in property 'encoding'.
        c                    g | ]	}|j         
S re   )r8   )rf   ms     r0   rh   z6CharsetMatch.could_be_from_charset.<locals>.<listcomp>   s    "D"D"D!1:"D"D"Dr2   )r$   r)   rH   s    r0   rp   z"CharsetMatch.could_be_from_charset   s%     "D"Dt|"D"D"DDDr2   utf_8r8   c                <     j          j         |k    r| _         t                     } j        K j                                        dvr0t	          t
           fd|dd         d          }||dd         z   }|                    |d           _         j        S )z
        Method to get re-encoded bytes payload using given target encoding. Default to UTF-8.
        Any errors will be simply ignored by the encoder NOT replaced.
        N)zutf-8utf8r   c                &   | j         |                                 d         |                                 d                                      |                                 d         t	          j                                      dd                    S )Nr   r   _-)stringspanreplacegroupsr   r,   )r   r/   s    r0   <lambda>z%CharsetMatch.output.<locals>.<lambda>   sh    ahqvvxx{QVVXXa['@AII

1!$"788@@cJJ  r2   i    r   )countr   )r,   r   r.   lowerr   r   encoder+   )r/   r8   decoded_stringpatched_headers   `   r0   outputzCharsetMatch.output   s    
  (D,AX,M,M$,D! YYN,80668812 2 "%3    #5D5)" " " "0.2G!G#1#8#89#M#MD ##r2   c                h    t          |                                                                           S )zw
        Retrieve the unique SHA256 computed using the transformed (re-encoded) payload. Not the original one.
        )r   r   	hexdigestrH   s    r0   r9   zCharsetMatch.fingerprint   s&    
 dkkmm$$..000r2   )NN)r   r   r   r   r   r   r   r   r   r   r   r   r    r   )r3   r4   r5   r   )r5   r   r5   r   )r3   r   r5   rN   )r5   rV   r5   r   )r5   r   )r5   r   )r   )r8   r   r5   r   )__name__
__module____qualname__r1   r;   rE   propertyrB   rK   rM   rS   r8   r\   r_   rb   r   rr   r?   r@   r{   r}   rG   r   r   r   rp   r   r9   re   r2   r0   r   r      s        '+-1J J J J J8Y Y Y Y( ( ( (, 6 6 6 X6   M M M M	# 	# 	# 	#    X 
 
 
 X
 $ $ $ X$ $ $ $ X$ / / / X/ % % % X%6 % % % X% % % % X%
 2 2 2 X2 6 6 6 X6    X    X % % % X% $ $ $ X$ E E E XE$ $ $ $ $: 1 1 1 X1 1 1r2   r   c                  T    e Zd ZdZdddZddZddZddZddZddZ	ddZ
ddZdS )CharsetMatchesz
    Container with every CharsetMatch items ordered by default from most probable to the less one.
    Act like a list(iterable) but does not implements all related methods.
    Nresultslist[CharsetMatch] | Nonec                6    |rt          |          ng | _        d S rU   )r   _results)r/   r   s     r0   r1   zCharsetMatches.__init__   s    ?F,NF7OOOBr2   r5   Iterator[CharsetMatch]c              #  $   K   | j         E d {V  d S rU   r   rH   s    r0   __iter__zCharsetMatches.__iter__   s&      =         r2   item	int | strr   c                    t          |t                    r| j        |         S t          |t                    r't	          |d          }| j        D ]}||j        v r|c S t          )z
        Retrieve a single item either by its position or encoding name (alias may be used here).
        Raise KeyError upon invalid index or encoding not present in results.
        F)r7   intr   r   r   rp   KeyError)r/   r   results      r0   __getitem__zCharsetMatches.__getitem__  sv    
 dC   	'=&&dC   	"T5))D- " "6777!MMM 8r2   r   c                *    t          | j                  S rU   rA   r   rH   s    r0   __len__zCharsetMatches.__len__  s    4=!!!r2   r   c                2    t          | j                  dk    S r   r   rH   s    r0   __bool__zCharsetMatches.__bool__  s    4=!!A%%r2   rN   c                   t          |t                    s4t          d                    t	          |j                                      t          |j                  t          k     rB| j	        D ]:}|j
        |j
        k    r(|j        |j        k    r|                    |            dS ;| j	                            |           t          | j	                  | _	        dS )z~
        Insert a single match. Will be inserted accordingly to preserve sort.
        Can be inserted as a submatch.
        z-Cannot append instance '{}' to CharsetMatchesN)r7   r   r=   rP   r   rQ   rA   rG   r   r   r9   r?   rS   rR   r   )r/   r   matchs      r0   rR   zCharsetMatches.append  s    
 $-- 	?FF''    tx==+++  $(888U[DJ=V=V&&t,,,FFT"""t}--r2   CharsetMatch | Nonec                .    | j         sdS | j         d         S )zQ
        Simply return the first match. Strict equivalent to matches[0].
        Nr   r   rH   s    r0   bestzCharsetMatches.best)  s      } 	4}Qr2   c                *    |                                  S )zP
        Redundant method, call the method best(). Kept for BC reasons.
        )r   rH   s    r0   firstzCharsetMatches.first1  s     yy{{r2   rU   )r   r   )r5   r   )r   r   r5   r   )r5   r   r   )r   r   r5   rN   )r5   r   )r   r   r   __doc__r1   r   r   r   r   rR   r   r   re   r2   r0   r   r      s         
O O O O O! ! ! !   " " " "& & & &. . . .(            r2   r   c                  6    e Zd ZddZedd            ZddZdS )CliDetectionResultpathr   r8   r   r\   rV   alternative_encodingsrr   r   r   r   r?   r   r@   unicode_pathis_preferredc                    || _         |
| _        || _        || _        || _        || _        || _        || _        || _        |	| _	        || _
        d S rU   )r   r   r8   r\   r   rr   r   r   r?   r@   r   )r/   r   r8   r\   r   rr   r   r   r?   r@   r   r   s               r0   r1   zCliDetectionResult.__init__=  s\     	(4$,+;0E"%$-$2!
 )".r2   r5   dict[str, Any]c                    | j         | j        | j        | j        | j        | j        | j        | j        | j        | j	        | j
        dS )Nr   r8   r\   r   rr   r   r   r?   r@   r   r   r   rH   s    r0   __dict__zCliDetectionResult.__dict__W  sO     I $ 5%)%?"1Z - -
 
 	
r2   c                0    t          | j        dd          S )NT   )ensure_asciiindent)r   r   rH   s    r0   to_jsonzCliDetectionResult.to_jsong  s    T]a@@@@r2   N)r   r   r8   r   r\   rV   r   rV   rr   r   r   rV   r   r   r?   r   r@   r   r   r   r   r   )r5   r   r   )r   r   r   r1   r   r   r   re   r2   r0   r   r   <  sb        / / / /4 
 
 
 X
A A A A A Ar2   r   N)
__future__r   encodings.aliasesr   hashlibr   jsonr   rer   typingr   r	   r
   r   constantr   r   utilsr   r   r   r   r   r   r   CoherenceMatchr   r   re   r2   r0   <module>r      sr   " " " " " " % % % % % %                   - - - - - - - - - - - - G G G G G G G G C C C C C C C C C Ce1 e1 e1 e1 e1 e1 e1 e1P@ @ @ @ @ @ @ @F sEz"' ,A ,A ,A ,A ,A ,A ,A ,A ,A ,Ar2   