
    Mh/                    r   d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d	d
lmZmZmZmZmZmZmZ  e
e          dMd            Z e
e          dNd            Z e
e          dOd            Z e
e          dMd            Z e
e          dMd            Z e
e          dMd            Z e
e          dMd            Z e
e          dMd            Z  e
e          dMd            Z! e
e          dMd            Z" e
e          dMd            Z# e
e          dMd            Z$ e
e          dMd            Z% e
e          dMd            Z& e
e          dMd            Z' e
e          dMd             Z( e
e          dMd!            Z) e
 e*e                    dPd#            Z+ e
e          dMd$            Z,dQdRd*Z- e
d+          dSd-            Z.dTd/Z/dUd1Z0dVdWd5Z1dXd9Z2dYd:Z3d;ej4        d<fdZd@Z5	 d[d\dLZ6dS )]    )annotationsN)IncrementalDecoder)aliases)	lru_cache)findall)	Generator)MultibyteIncrementalDecoder   )ENCODING_MARKSIANA_SUPPORTED_SIMILARRE_POSSIBLE_ENCODING_INDICATIONUNICODE_RANGES_COMBINEDUNICODE_SECONDARY_RANGE_KEYWORDUTF8_MAXIMAL_ALLOCATIONCOMMON_CJK_CHARACTERS)maxsize	characterstrreturnboolc                    	 t          j        |           }n# t          $ r Y dS w xY wd|v pd|v pd|v pd|v pd|v pd|v pd|v pd	|v S )
NFz
WITH GRAVEz
WITH ACUTEzWITH CEDILLAzWITH DIAERESISzWITH CIRCUMFLEXz
WITH TILDEzWITH MACRONzWITH RING ABOVEunicodedataname
ValueErrorr   descriptions     X/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/charset_normalizer/utils.pyis_accentuatedr      s    &+I66   uu 	# 	,;&	,[(	, {*	, +		,
 ;&	, K'	, +	    
%%c                    t          j        |           }|s| S |                    d          }t          t	          |d         d                    S )N r      )r   decompositionsplitchrint)r   
decomposedcodess      r   remove_accentr*   -   sO    !/	::J !'',,Es58R  !!!    
str | Nonec                f    t          |           }t          j                    D ]\  }}||v r|c S dS )zK
    Retrieve the Unicode range official name from a single character.
    N)ordr   items)r   character_ord
range_name	ord_ranges       r   unicode_ranger3   8   sO    
 YM!8!>!@!@  
II%% & 4r+   c                X    	 t          j        |           }n# t          $ r Y dS w xY wd|v S )NFLATINr   r   s     r   is_latinr6   F   sF    &+I66   uuk!!r    c                d    t          j        |           }d|v rdS t          |           }|dS d|v S )NPTFPunctuationr   categoryr3   r   character_categorycharacter_ranges      r   is_punctuationr?   O   sG    )29==
   t"/	":":OuO++r+   c                x    t          j        |           }d|v sd|v rdS t          |           }|dS d|v o|dk    S )NSNTFFormsLor:   r<   s      r   	is_symbolrE   ^   s]    )29==
   C+=$=$=t"/	":":Ouo%D*<*DDr+   c                8    t          |           }|dS d|v pd|v S )NF	EmoticonsPictographs)r3   )r   r>   s     r   is_emoticonrI   m   s/    "/	":":Ou/)M]o-MMr+   c                n    |                                  s| dv rdS t          j        |           }d|v p|dv S )N>      ｜+<>TZ>   PcPdPo)isspacer   r;   )r   r=   s     r   is_separatorrT   w   sO     i+AAAt)29==$$P(:>P(PPr+   c                V    |                                  |                                 k    S N)islowerisupperr   s    r   is_case_variablerZ      s%    )"3"3"5"555r+   c                X    	 t          j        |           }n# t          $ r Y dS w xY wd|v S )NFCJKr   r   character_names     r   is_cjkr_      sH    $))44   uu N""r    c                X    	 t          j        |           }n# t          $ r Y dS w xY wd|v S )NFHIRAGANAr   r]   s     r   is_hiraganarb      H    $))44   uu ''r    c                X    	 t          j        |           }n# t          $ r Y dS w xY wd|v S )NFKATAKANAr   r]   s     r   is_katakanarf      rc   r    c                X    	 t          j        |           }n# t          $ r Y dS w xY wd|v S )NFHANGULr   r]   s     r   	is_hangulri      H    $))44   uu ~%%r    c                X    	 t          j        |           }n# t          $ r Y dS w xY wd|v S )NFTHAIr   r]   s     r   is_thairm      sH    $))44   uu ^##r    c                X    	 t          j        |           }n# t          $ r Y dS w xY wd|v S )NFARABICr   r]   s     r   	is_arabicrp      rj   r    c                `    	 t          j        |           }n# t          $ r Y dS w xY wd|v od|v S )NFro   zISOLATED FORMr   r]   s     r   is_arabic_isolated_formrr      sR    $))44   uu ~%K/^*KKr    c                    | t           vS rV   )r   rY   s    r   is_cjk_uncommonrt      s    111r+   r1   c                D     t           fdt          D                       S )Nc              3      K   | ]}|v V  	d S rV    ).0keywordr1   s     r   	<genexpr>z-is_unicode_range_secondary.<locals>.<genexpr>   s(      TTw*$TTTTTTr+   )anyr   )r1   s   `r   is_unicode_range_secondaryr|      s'    TTTT4STTTTTTr+   c                r    |                                  du o!|                                 du o| dk    o| dk    S )NFu   ﻿)rS   isprintablerY   s    r   is_unprintabler      sR     	u$ 	"!!##u,	"	" !	r+       sequencebytessearch_zoner'   c           	        t          | t                    st          t          |           }t	          t
          | dt          ||                                       dd                    }t          |          dk    rdS |D ][}|                                	                    dd          }t          j                    D ]\  }}||k    r|c c S ||k    r|c c S \dS )zW
    Extract using ASCII-only decoder any specified encoding in the first n-bytes.
    Nasciiignoreerrorsr   -_)
isinstancer   	TypeErrorlenr   r   mindecodelowerreplacer   r/   )r   r   seq_lenresultsspecified_encodingencoding_aliasencoding_ianas          r   any_specified_encodingr      s    h&& x==G ',3w,,,-44WX4NN G
 7||qt% 
% 
%/5577??SII
 .5]__ 	% 	%)NM!333$$$$$$ 222$$$$$$ 3	% 4r+      r   c                h    | dv p.t          t          j        d|            j        t                    S )zQ
    Verify is a specific encoding is a multi byte one based on it IANA name
    >	   utf_7utf_8utf_16utf_32	utf_16_be	utf_16_le	utf_32_be	utf_32_le	utf_8_sig
encodings.)
issubclass	importlibimport_moduler   r	   )r   s    r   is_multi_byte_encodingr     sG    
  
 
  
 3T 3 344G#
 
r+   tuple[str | None, bytes]c                    t           D ]I}t           |         }t          |t                    r|g}|D ]}|                     |          r||fc c S  JdS )z9
    Identify and extract SIG/BOM in given sequence.
    )Nr+   )r   r   r   
startswith)r   iana_encodingmarksmarks       r   identify_sig_or_bomr     s    
 ( + +%3M%BeU## 	GE 	+ 	+D""4(( +$d******+	+ 9r+   r   c                
    | dvS )N>   r   r   rw   )r   s    r   should_strip_sig_or_bomr   (  s     444r+   Tcp_namestrictc                    |                                                      dd          } t          j                    D ]\  }}| ||fv r|c S |rt	          d|  d          | S )zIReturns the Python normalized encoding name (Not the IANA official name).r   r   zUnable to retrieve IANA for '')r   r   r   r/   r   )r   r   r   r   s       r   	iana_namer   ,  s    mmoo%%c3//G
 *1 ! !%~}555     6  ECCCCDDDNr+   iana_name_aiana_name_bfloatc                   t          |           st          |          rdS t          j        d|            j        }t          j        d|           j        } |d          } |d          }d}t	          d          D ]C}t          |g          }|                    |          |                    |          k    r|dz  }D|dz  S )	Ng        r   r   r   r      r
      )r   r   r   r   ranger   r   )	r   r   	decoder_a	decoder_bid_aid_bcharacter_match_countito_be_decodeds	            r   cp_similarityr   =  s    k** .D[.Q.Q s'(B[(B(BCCVI'(B[(B(BCCVI(y999D(y999D!"3ZZ ' '$aSzz;;}%%])C)CCC!Q&! 3&&r+   c                2    | t           v o|t           |          v S )z
    Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using
    the function cp_similarity.
    )r   )r   r   s     r   is_cp_similarr   Q  s%     	-- 	?1+>>r+   charset_normalizerz)%(asctime)s | %(levelname)s | %(message)slevelformat_stringNonec                    t          j        |           }|                    |           t          j                    }|                    t          j        |                     |                    |           d S rV   )logging	getLoggersetLevelStreamHandlersetFormatter	Formatter
addHandler)r   r   r   loggerhandlers        r   set_logging_handlerr   \  sm    
 t$$F
OOE#%%G*=99:::
gr+   	sequencesr   offsetsr   
chunk_sizebom_or_sig_availablestrip_sig_or_bomsig_payloadis_multi_byte_decoderdecoded_payloadGenerator[str, None, None]c	              #    K   |r!|du r|D ]}	||	|	|z            }
|
s d S |
V  d S |D ]}	|	|z   }|t          |           dz   k    r| |	|	|z            }|r	|du r||z   }|                    ||rdnd          }
|ru|	dk    rot          |d          }|r]|
d |         |vrQt          |	|	dz
  d	          D ]<}| ||         }|r	|du r||z   }|                    |d          }
|
d |         |v r n=|
V  d S )
NF   r   r   r   r   r#      )r   r   r   r   )r   r   r   r   r   r   r   r   r   r   chunk	chunk_endcut_sequencechunk_partial_size_chkjs                  r   cut_sequence_chunksr   i  s       *0E99 	 	A#AJ$67E KKKK		 	  #	 #	AJI3y>>A---$QZ%78L# :(8E(A(A*\9 ''#8Fxxh (  E % "Q.1*b.A.A& $"5556oMM"1a!eR00 	" 	"'09'=/ F4D4M4M+6+EL , 3 3M( 3 S S !8"8!89_LL!E M KKKKG#	 #	r+   )r   r   r   r   )r   r   r   r   )r   r   r   r,   )r1   r   r   r   )r   )r   r   r   r'   r   r,   )r   r   r   r   )r   r   r   r   )r   r   r   r   )T)r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r'   r   r   r   r   rV   )r   r   r   r   r   r   r   r'   r   r   r   r   r   r   r   r   r   r,   r   r   )7
__future__r   r   r   r   codecsr   encodings.aliasesr   	functoolsr   rer   typingr   _multibytecodecr	   constantr   r   r   r   r   r   r   r   r*   r3   r6   r?   rE   rI   rT   rZ   r_   rb   rf   ri   rm   rp   rr   rt   r   r|   r   r   r   r   r   r   r   r   INFOr   r   rw   r+   r   <module>r      s   " " " " " "          % % % % % % % % % % % %                                         *+++   ,+" *+++" " " ,+" *+++
 
 
 ,+
 *+++" " " ,+" *+++, , , ,+, *+++E E E ,+E *+++N N N ,+N *+++Q Q Q ,+Q *+++6 6 6 ,+6 *+++# # # ,+# *+++( ( ( ,+( *+++( ( ( ,+( *+++& & & ,+& *+++$ $ $ ,+$ *+++& & & ,+& *+++L L L ,+L *+++2 2 2 ,+2 33.//000U U U 10U *+++   ,+    @ 3   (   $5 5 5 5    "' ' ' '(    %D
 
 
 
 
, #'5 5 5 5 5 5 5r+   