
    cMhw7                    2   d dl mZ d dlmZ d dlmZ d dlZd dlZd dl	m
Z
mZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZ d dlmZ d dlmZmZ d dlmZm Z m!Z! erd dl"m#Z#m$Z$m%Z% d dl&m'Z'm(Z(m)Z)m*Z* d dl+m,Z,m-Z-  G d de          Z.ddZ/ddZ0dS )    )annotations)defaultdict)TYPE_CHECKINGN)libparsers)import_optional_dependency)DtypeWarning)find_stack_levelpandas_dtype)concat_compatunion_categoricals)CategoricalDtype)ensure_index_from_sequences)dedup_namesis_potential_multi_index)
ParserBaseParserErroris_index_col)HashableMappingSequence)	ArrayLikeDtypeArgDtypeObjReadCsvBuffer)Index
MultiIndexc                  d     e Zd ZU ded<   ded<   d fd	Zdd
ZddZ	 dddZddZdddZ	 xZ
S ) CParserWrapperbool
low_memoryzparsers.TextReader_readersrcReadCsvBuffer[str]returnNonec                p   t                                          |           || _        |                                }|                    dd          | _        | j        du|d<   | j        |d<   | j        j	        |d<   dD ]}|                    |d            t          |                    dd                     |d<   d|vs|d         t          j        u rd	|d<   |d         d
k    rt          d
           t          j        |fi || _        | j        j        | _        | j        d u }| j        j        d | _        n:|                     | j        j        | j        |          \  | _        | _        | _        }| j        +t1          t3          | j        j                            | _        | j        d d          | _        | j        r |                     | j        | j                  | j        J | j        dk    rBt=                                        | j                  s|                      | j                   tC          | j                  tC                    k    r%fdtE          | j                  D             | _        tC          | j                  tC                    k     r|                      | j                   | #                    | j                   | $                                 | j        | _        | j%        s| j        j&        dk    rWtO          | j                  rCd| _(        | )                    | j        | j                  \  }| _        | _        | j        || _        | j        j        (|s&| j        J d gtC          | j                  z  | _        | j        j&        dk    | _*        d S )Nr"   Fallow_leading_colsusecolson_bad_lines)storage_optionsencoding
memory_mapcompressiondtypedtype_backendnumpypyarrowstringc                *    g | ]\  }}|v s|v |S  r6   ).0inr*   s      b/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/pandas/io/parsers/c_parser_wrapper.py
<listcomp>z+CParserWrapper.__init__.<locals>.<listcomp>   s:        1WW  )5    r   T)+super__init__kwdscopypopr"   	index_colr*   r+   valueensure_dtype_objsgetr   
no_defaultr   r   
TextReaderr#   unnamed_colsnamesheader_extract_multi_indexer_columnsindex_names	col_nameslistrangetable_width
orig_names_evaluate_usecolsusecols_dtypesetissubset_validate_usecols_nameslen	enumerate_validate_parse_dates_presence_set_noconvert_columns_has_complex_date_colleading_colsr   _name_processed_clean_index_names_implicit_index)selfr$   r?   keypassed_namesrL   r*   	__class__s         @r:   r>   zCParserWrapper.__init__<   s   	yy{{((<77
 N%' 	!"
 ,Y  $06^
 	  	 C HHS$)$((7D*A*ABBW$&&$*?3>*Q*Q$+D! I--&y111)#6666 L5 zT)<&DJJ 33#  
  :eDL$<==>>DJ *QQQ-< 	,,T\4?KKG ?...!X--c'll6K6K7 7- ,,WdoFFF 4:W--    !*$* 5 5  
 4:W--,,J   	++DJ777##%%% *) 	B|(A--,3 3- (,$ ++JN	 JN #+'2D$|"*<*'333$(6C0@,A,A#A #|81<r<   c                \    	 | j                                          d S # t          $ r Y d S w xY wN)r#   close
ValueError)r`   s    r:   rf   zCParserWrapper.close   sC    	L      	 	 	DD	s    
++c                    | j         J d t          | j                   D             fd| j        D             }|                     || j                  }|D ]}| j                            |           dS )z
        Set the columns that should not undergo dtype conversions.

        Currently, any column that is involved with date parsing will not
        undergo such conversions.
        Nc                    i | ]\  }}||	S r6   r6   )r7   r8   xs      r:   
<dictcomp>z9CParserWrapper._set_noconvert_columns.<locals>.<dictcomp>   s    BBBtq!aBBBr<   c                     g | ]
}|         S r6   r6   )r7   rj   
names_dicts     r:   r;   z9CParserWrapper._set_noconvert_columns.<locals>.<listcomp>   s    999z!}999r<   )rQ   rX   rI   _set_noconvert_dtype_columnsr#   set_noconvert)r`   col_indicesnoconvert_columnscolrm   s       @r:   rZ   z%CParserWrapper._set_noconvert_columns   s     *** CBy'A'ABBB
9999dj999 ==J
 
 % 	, 	,CL&&s++++	, 	,r<   Nnrows
int | None_tuple[Index | MultiIndex | None, Sequence[Hashable] | MultiIndex, Mapping[Hashable, ArrayLike]]c                   	 | j         r*| j                            |          }t          |          }n| j                            |          }n# t
          $ r | j        rd| _        t          | j        t          | j        | j
                            }|                     || j                  \  }}|                     | j                  | j        |                               fd|                                D             }||fcY S |                                   w xY wd| _        | j        }| j        j        r| j        rt-          d          g }| j
        rT| j        j        t/          | j
                  k    r2t1          dt/          | j
                   d| j        j         d          t3          | j        j                  D ]l}| j
        |                    |          }	n |                    | j
        |                   }	|                     |	|d	          }	|                    |	           mt;          |          }| j        |                     |          }t          |t          || j
                            }t=          |                                          }
d
 t?          ||
          D             }|                      ||          \  }}|                     || j                  }nt=          |                                          }
| j        J tC          | j                  }t          |t          || j
                            }| j        |                     |          }d |
D             }| j        | "                    ||           d t?          ||
          D             }|                      ||          \  }}| #                    |||          \  }}|||fS )NFr0   c                $    i | ]\  }}|v 	||S r6   r6   )r7   kvcolumnss      r:   rk   z'CParserWrapper.read.<locals>.<dictcomp>   s$    NNNTQgAqr<   z file structure not yet supportedz,Could not construct index. Requested to use z number of columns, but z left to parse.T)try_parse_datesc                     i | ]\  }\  }}||S r6   r6   r7   ry   r8   rz   s       r:   rk   z'CParserWrapper.read.<locals>.<dictcomp>.  "    @@@YQAAq@@@r<   c                    g | ]
}|d          S )   r6   r7   rj   s     r:   r;   z'CParserWrapper.read.<locals>.<listcomp>F  s    ///qt///r<   c                     i | ]\  }\  }}||S r6   r6   r~   s       r:   rk   z'CParserWrapper.read.<locals>.<dictcomp>J  r   r<   )$r"   r#   read_low_memory_concatenate_chunksreadStopIteration_first_chunkr   rQ   r   rB   _get_empty_metar0   _maybe_make_multi_index_columnsrM   r*   _filter_usecolsitemsrf   rI   r\   r[   NotImplementedErrorrW   r   rO   rA   _maybe_parse_datesappendr   sortedzip_do_date_conversionsrN   _check_data_length_make_index)r`   rs   chunksdatarI   indexcol_dictarraysr8   values	data_tupscolumn_names	date_dataalldatar{   s                 @r:   r   zCParserWrapper.read   s*   	 055e<<*622 |((// 	 	 	  $)!#O,T_dnMM  ,0+?+?* ,@ , ,(w >>wWW<+"227;;GNNNNX^^-=-=NNNgx//// 

-	2 " 
<$ ?	N) N)*LMMM F~ $,";s4>?R?R"R"R!B4>**B B|0B B B   4<455 & &>)!XXa[[FF!XXdnQ&788F00D0QQf%%%%/77E|',,U33'?t~'V'VWWE tzz||,,I@@#eY*?*?@@@D&*&?&?t&L&L#L)  ??dn LL tzz||,,I
 ?...))E'?t~'V'VWWE|',,U33 0/Y///G|#''w777@@#eY*?*?@@@D#88EEE9"&"2"29gu"M"ME<lI--s   AA CD(D(rI   Sequence[Hashable]c                    |                      | j        |          ;t          |          t                    k    rfdt          |          D             }|S )Nc                *    g | ]\  }}|v s|v |S r6   r6   )r7   r8   namer*   s      r:   r;   z2CParserWrapper._filter_usecols.<locals>.<listcomp>U  s2        DWPWr<   )rR   r*   rW   rX   )r`   rI   r*   s     @r:   r   zCParserWrapper._filter_usecolsQ  sm    ((u==3u::W#=#=   $-e$4$4  E r<   Tr   intr|   c                    |r@|                      |          r+|                     || j        | j        |         nd           }|S )N)rr   )_should_parse_dates
_date_convrL   )r`   r   r   r|   s       r:   r   z!CParserWrapper._maybe_parse_datesZ  sZ     	t77>> 	__/3/?/KD$U++QU %  F r<   )r$   r%   r&   r'   )r&   r'   re   )rs   rt   r&   ru   )rI   r   r&   r   )T)r   r   r|   r!   )__name__
__module____qualname____annotations__r>   rf   rZ   r   r   r   __classcell__)rc   s   @r:   r    r    8   s         D= D= D= D= D= D=L   , , , ,. !q. q. q. q. q.f           r<   r    r   list[dict[int, ArrayLike]]r&   dictc                  
 t          | d                                                   }g }i }|D ]Պ

fd| D             }d |D             }d |D             }|                                }t          |t                    rt          |d          |
<   ft          |          |
<   t          |          dk    rJ|
         j        t          j        t                    k    r"|                    t          
                     |rWd                    |          }d	                    d
| dg          }	t          j        |	t           t#                                 |S )z
    Concatenate chunks of data read with low_memory=True.

    The tricky part is handling Categoricals, where different chunks
    may have different inferred categories.
    r   c                :    g | ]}|                               S r6   )rA   )r7   chunkr   s     r:   r;   z'_concatenate_chunks.<locals>.<listcomp>o  s#    444E		$444r<   c                    h | ]	}|j         
S r6   rw   )r7   as     r:   	<setcomp>z&_concatenate_chunks.<locals>.<setcomp>q  s    (((a!'(((r<   c                <    h | ]}t          |t                    |S r6   )
isinstancer   r   s     r:   r   z&_concatenate_chunks.<locals>.<setcomp>r  s(    SSS:aAQ3R3RS!SSSr<   F)sort_categoriesr   , z	Columns (zK) have mixed types. Specify dtype option on import or set low_memory=False.)
stacklevel)rN   keysrA   r   r   r   r   rW   r0   npobjectr   strjoinwarningswarnr	   r
   )r   rI   warning_columnsresultarrsdtypesnon_cat_dtypesr0   warning_nameswarning_messager   s             @r:   r   r   c  s{    !!""EOF 2 24444V444((4(((SSVSSS

e-.. 	2-dEJJJF4LL(..F4L>""Q&&6$<+=&AQAQ+Q+Q&&s4yy111 T11((KM K K K
 
 	o|@P@R@RSSSSMr<   r0   *DtypeArg | dict[Hashable, DtypeArg] | None*DtypeObj | dict[Hashable, DtypeObj] | Nonec                d    t           t                    rdt                                                     t          fd          }                                 D ]}t           |                   ||<   |S t           t
                    r fd D             S  t                     S  S )zc
    Ensure we have either None, a dtype object, or a dictionary mapping to
    dtype objects.
    c                      S re   r6   )default_dtypes   r:   <lambda>z#ensure_dtype_objs.<locals>.<lambda>  s    = r<   c                <    i | ]}|t          |                   S r6   r   )r7   ry   r0   s     r:   rk   z%ensure_dtype_objs.<locals>.<dictcomp>  s'    999a<a))999r<   )r   r   r   default_factoryr   r   )r0   dtype_convertedra   r   s   `  @r:   rD   rD     s     %%% 
#$U%:%:%<%<=='23H3H3H3H'I'I::<< 	< 	<C#/c
#;#;OC  	E4	 	  #999959999		E"""Lr<   )r   r   r&   r   )r0   r   r&   r   )1
__future__r   collectionsr   typingr   r   r2   r   pandas._libsr   r   pandas.compat._optionalr   pandas.errorsr	   pandas.util._exceptionsr
   pandas.core.dtypes.commonr   pandas.core.dtypes.concatr   r   pandas.core.dtypes.dtypesr   pandas.core.indexes.apir   pandas.io.commonr   r   pandas.io.parsers.base_parserr   r   r   collections.abcr   r   r   pandas._typingr   r   r   r   pandasr   r   r    r   rD   r6   r<   r:   <module>r      sp   " " " " " " # # # # # #                         ? > > > > > & & & & & & 4 4 4 4 4 4 2 2 2 2 2 2        7 6 6 6 6 6 ? ? ? ? ? ?                                             h h h h hZ h h hV	" " " "J     r<   