
    bMh)                         d Z ddlmZmZ ddlZddlZddlmc m	Z
 ddlmZ ddlmZ ddlmZ ddlmZ ddlmZmZ ddlmZ  G d	 d
          Zd ZdS )z\
Tests the TextReader class in parsers.pyx, which
is integral to the C engine in parsers.py
    )BytesIOStringION)
TextReader)ParserWarning)	DataFrame)TextFileReaderread_csv)ensure_dtype_objsc                      e Zd Zej        d             Zd Zd Zd Zd Z	d Z
d Zd Zd	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zej                            ddddifdddifdddifdddifdddifdddifg          d              Zd! Zej                            d" ed#                    d$             Zd% Zd&S )'TestTextReaderc                      |dddd          S )Niodatacsvz	test1.csv )selfdatapaths     f/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/pandas/tests/io/parser/test_textreader.pycsv_pathzTestTextReader.csv_path   s    xfe[999    c                     t          |d          5 }t          |          }|                                 d d d            d S # 1 swxY w Y   d S )Nrbopenr   readr   r   freaders       r   test_file_handlezTestTextReader.test_file_handle    s    (D!! 	Q]]FKKMMM	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   $AA	Ac                     t          |d          5 }t          |d           }|                                 d d d            d S # 1 swxY w Y   d S Nr   headerr   r   s       r   test_file_handle_mmapz$TestTextReader.test_file_handle_mmap%   s    (D!! 	Q$///FKKMMM	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   &AAAc                     t          |d          5 }|                                }d d d            n# 1 swxY w Y   t          |          }t          |d           }|                                 d S r!   )r   r   r   r   )r   r   r   textsrcr   s         r   test_StringIOzTestTextReader.test_StringIO+   s    (D!! 	Q6688D	 	 	 	 	 	 	 	 	 	 	 	 	 	 	dmmC---s   266c           	          d}t          t          |          d           }|                                }t          t	          t          t          |d                                       dk    sJ d S )Nz	a
b
a
b
ar"   r      )r   r   r   lensetmapidr   r   r   results       r   test_string_factorizez$TestTextReader.test_string_factorize2   sa    HTNN48883s2vay))**++q000000r   c                 \   d}t          t          |          dd           }|                                }t          j        |d         t          j        g dt
          j                             t          j        |d         t          j        g dt
          j                             d S )	Nza,   b
a,   b
a,   b
a,   bT)skipinitialspacer#   r   )ar4   r4   r4   dtype   )br8   r8   r8   r   r   r   tmassert_numpy_array_equalnparrayobject_r/   s       r   test_skipinitialspacez$TestTextReader.test_skipinitialspace9   s    /HTNNT$OOO
#1Irx 4 4 4BJGGG	
 	
 	
 	#1Irx 4 4 4BJGGG	
 	
 	
 	
 	
r   c                     d}t          t          |          d           }|                                }|d         j        t          j        k    sJ d S )NzTrue
False
True
Truer"   r   )r   r   r   r6   r<   bool_r/   s       r   test_parse_booleansz"TestTextReader.test_parse_booleansF   sK    (HTNN4888ay"(******r   c                 \   d}t          t          |          dd           }|                                }t          j        |d         t          j        g dt
          j                             t          j        |d         t          j        g dt
          j                             d S )	Nza  b
a		 "b"
"a"	 	 bT)delim_whitespacer#   r   )r4   r4   r4   r5   r7   )r8   r8   r8   r9   r/   s       r   test_delimit_whitespacez&TestTextReader.test_delimit_whitespaceN   s    ,HTNNT$OOO
#1IrxrzBBB	
 	
 	
 	#1IrxrzBBB	
 	
 	
 	
 	
r   c                     d}t          t          |          d           }|                                }t          j        g dt          j                  }t          j        |d         |           d S )Nza
"hello
there"
thisr"   )r4   zhello
therethisr5   r   )r   r   r   r<   r=   r>   r:   r;   r   r   r   r0   expecteds        r   test_embedded_newlinez$TestTextReader.test_embedded_newline[   sf    (HTNN48888999LLL
#F1Ix88888r   c                     d}t          t          |          ddd           }|                                }t          j        ddg          }t          j        |d         |           d S )Nz12345,67
345,678:,)	delimiterdecimalr#   g)\@g+ٚu@r   )r   r   r   r<   r=   r:   assert_almost_equalrH   s        r   test_euro_decimalz TestTextReader.test_euro_decimald   sa    "HTNNc3tTTT8Xw/00
vay(33333r   c                     d}t          t          |          ddd           }|                                }t          j        ddgt          j                  }t          j        |d         |           d S )	Nz123,456
12,500rL   rM   rN   	thousandsr#   @ 0  r5   r   )r   r   r   r<   r=   int64r:   rP   rH   s        r   test_integer_thousandsz%TestTextReader.test_integer_thousandsm   sh     HTNNcSQUVVV8VUO28<<<
vay(33333r   c                     d}t          t          |          ddd           }|                                }t          ddg          }t	          j        ||           d S )Nz123.456
12.500rL   .rS   rU   rV   )r   r   r   r   r:   assert_frame_equalrH   s        r   test_integer_thousands_altz)TestTextReader.test_integer_thousands_altv   sc     TNNcS
 
 
 fe_--
fh/////r   c                    d}t          t          |          dd           }d}t          j        t          j        |          5  |                                 d d d            n# 1 swxY w Y   t          t          |          dd d          }|                                }t          j        g dt          	          t          j        g d
t          	          t          j        g dt          	          d}t          ||           t          j        t          d          5  t          t          |          dd d          }|                                 d d d            d S # 1 swxY w Y   d S )Nz'a:b:c
d:e:f
g:h:i
j:k:l:m
l:m:n
o:p:q:rrL   rN   r#   zCError tokenizing data\. C error: Expected 3 fields in line 4, saw 4)matchr*   )rN   r#   on_bad_lines)r4   dglr5   )r8   ehm)cr   inr   r7   r*   zSkipping liner7   )r   r   pytestraisesparserParserErrorr   r<   r=   objectassert_array_dicts_equalr:   assert_produces_warningr   )r   r   r   msgr0   rI   s         r   test_skip_bad_linesz"TestTextReader.test_skip_bad_lines   s   =HTNNc$GGGT]6-S999 	 	KKMMM	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 TNNc$Q
 
 
 x,,,F;;;x,,,F;;;x,,,F;;;
 

 	!222'_MMM 	 	#d  F KKMMM		 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s$   A%%A),A)&5E((E,/E,c                    d}t          t          |          dd          }|j        }g dg}||k    sJ |                                }t	          j        ddgt          j                  t	          j        dd	gt          j                  t	          j        d
dgt          j                  d}t          ||           d S )Nz%skip this
skip this
a,b,c
1,2,3
4,5,6rM   r*   r^   r4   r8   rg   r7      r5            rj   )r   r   r#   r   r<   r=   rW   rp   )r   r   r   r#   rI   recss         r   test_header_not_enough_linesz+TestTextReader.test_header_not_enough_lines   s    :HTNNc!DDD#OO$!!!!{{}}xAbh///xAbh///xAbh///
 

 	!x00000r   c                     d}t          t          |          dd d          }|                                }dt          j        dgdz  t
                    i}t          ||           d S )	Nz,\"hello world"
\"hello world"
\"hello world"rM   \)rN   r#   
escapecharr   z"hello world"rx   r5   )r   r   r   r<   r=   ro   rp   rH   s        r   test_escapecharzTestTextReader.test_escapechar   sh    BHTNNc$SWXXXrx 1A 5VDDDE 22222r   c                     d S Nr   r   s    r   test_eof_has_eolzTestTextReader.test_eof_has_eol   s    r   c                     d S r   r   r   s    r   test_na_substitutionz#TestTextReader.test_na_substitution   s    r   c                    dfd} |d          }|                                 }|d         j        dk    sJ t          j        g dd          }|d         |k                                    sJ |d         j        d	k    sJ  |d
          }|                                 }|d         j        d
k    sJ t          j        g dd
          }|d         |k                                    sJ |d         j        d
k    sJ d S )Nza,1
aa,2
aaa,3
aaaa,4
aaaaa,5c                  v    d| v rt          | d                   | d<   t          t                    fdd d| S )Nr6   rM   r^   r
   r   r   kwdsr   s    r   _make_readerz<TestTextReader.test_numpy_string_dtype.<locals>._make_reader   sD    $ 1$w- @ @WhtnnQDQQDQQQr   zS5,i4r5   r   S5)r4   aaaaaaaaaaaaaar7   i4S4)r4   r   r   r   r   )r   r6   r<   r=   all)r   r   r   r0   	ex_valuesr   s        @r   test_numpy_string_dtypez&TestTextReader.test_numpy_string_dtype   sB   	R 	R 	R 	R 	R
 G,,,ay$&&&&H@@@MMM	q	Y&++-----ay$&&&&D)))ay$&&&&H???tLLL	q	Y&++-----ay$&&&&&&r   c                 2   dfd} |ddd          }|                                 }|d         j        dk    sJ |d         j        dk    sJ  |t          j        t          d          }|                                 }|d         j        dk    sJ |d         j        d	k    sJ  |t          j        d          t          j        d	          d          }|                                 }|d         j        dk    sJ |d         j        d	k    sJ d S )
Nzone,two
1,a
2,b
3,c
4,dc                  t    d| v rt          | d                   | d<   t          t                    fddi| S )Nr6   rN   rM   r   r   s    r   r   z4TestTextReader.test_pass_dtype.<locals>._make_reader   sB    $ 1$w- @ @WhtnnDDDtDDDr   u1S1)oner7   r5   r   r7   O)r   r6   r<   uint8ro   )r   r   r   r0   r   s       @r   test_pass_dtypezTestTextReader.test_pass_dtype   sA   	E 	E 	E 	E 	E
 DT$:$:;;;ay$&&&&ay$&&&&BH$@$@AAAay$&&&&ay#%%%%BHTNNrx}}$M$MNNNay$&&&&ay#%%%%%%r   c                 P   dfd} |d          }|                                 } |                                             }t          |          dk    sJ |d         |d         k                                    sJ |d         |d         k                                    sJ d S )Nz a,b,c
1,2,3
4,5,6
7,8,9
10,11,12c                  <    t          t                    fddi| S )NrN   rM   )r   r   r   s    r   r   z1TestTextReader.test_usecols.<locals>._make_reader   s$    htnnDDDtDDDr   )r7   r*   )usecolsr*   r7   )r   r+   r   )r   r   r   r0   expr   s        @r   test_usecolszTestTextReader.test_usecols   s    	E 	E 	E 	E 	E f---lnn!!##6{{aq	SV#((*****q	SV#((*******r   ztext, kwargsz a,b,c1,2,34,5,67,8,910,11,12rN   rM   z*a  b  c1  2  34  5  67  8  910  11  12rD   Tz a,b,c1,2,34,5,6,88,910,11,12zdA,B,C,D,E,F,G,H,I,J,K,L,M,N,OAAAAA,BBBBB,0,0,0,0,0,0,0,0,0,0,0,0,0,BBBBB,0,0,0,0,0,0,0,0,0,0,0,0,0zA  B  C  2  34  5  6zA B C2 34 5 6c                    |                     dd          }t          t          |          fi |                                }t          t          |          fi |                                }t	          ||           d S )Nz
)replacer   r   r   rp   )r   r&   kwargs	nice_textr0   rI   s         r   test_cr_delimitedz TestTextReader.test_cr_delimited  s}    , LLv..	HTNN55f55::<<hy11<<V<<AACC 22222r   c                 B   d}t          t          |          d                                          }t          j        ddgt          j                  t          j        ddgt                    t          j        d	dgt                    d
}t          ||           d S )Nza,b,c
1,2,3
4,,rM   )rN   r7   rv   r5   2 3rj   )r   r   r   r<   r=   rW   ro   rp   )r   r   r0   rI   s       r   test_empty_field_eofz#TestTextReader.test_empty_field_eof!  s    "HTNNc:::??AA xAbh///xb	000xb	000
 

 	!22222r   repeat
   c           	         t          dgt          j        ggdgddg          }t          g dg dgt          d          ddg          }t          g dd	t          j        t          j        t          j        gg d
ddt          j        t          j        ggt          d          g d          }t	          t          d          ddgd          }t          j        ||           t	          t          d          t          d          d          }t          j        ||           t	          t          d          t          d          d          }t          j        ||           d S )Nr8   r4   rg   )columnsindex)r7   r7   r7   r   abcdr7   )r7   r*   rx   rv   ry   )   	   r            )r   rw         za,b
c
r   )skiprowsnamesenginez1,1,1,1,0
1,1,1,1,0


)r   r   z"0,1,2,3,4
5,6
7,8,9,10,11
12,13,14)r   r<   nanlistr	   r   r:   r[   )r   r   r4   r8   rg   dfs         r   #test_empty_field_eof_mem_access_bugz2TestTextReader.test_empty_field_eof_mem_access_bug-  sm    urvh'#sCjIII|||\\\2DLLQRTUPVWWWBFBFBF+R(	 LL--	
 	
 	
 hz**QseCPPP
b!$$$122$v,,s
 
 
 	b!$$$<==v,,
 
 

 	b!$$$$$r   c                     t          t                      dd g d          5 }t          |t                    sJ 	 d d d            d S # 1 swxY w Y   d S )N   ru   )	chunksizer#   r   )r	   r   
isinstancer   )r   r   s     r   test_empty_csv_inputz#TestTextReader.test_empty_csv_inputL  s    JJ"T
 
 
 	2b.111111	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2s   AAAN) __name__
__module____qualname__rk   fixturer   r   r$   r(   r1   r?   rB   rE   rJ   rQ   rX   r\   rs   r{   r   r   r   r   r   r   markparametrizer   r   ranger   r   r   r   r   r   r      s6       ^: : ^:  
    1 1 1
 
 
+ + +
 
 
9 9 94 4 44 4 4	0 	0 	0  41 1 1 3 3 3    ' ' ':& & &8+ + +& [3k35GH@#T* 4k35GH7 c" (*<d)CD #5t"<=!	
 *3 3+ *3
3 
3 
3 [XuuRyy11% % 21%<2 2 2 2 2r   r   c                     |                                  D ]D\  }}t          j        t          j        |          t          j        ||                              Ed S r   )itemsr:   r;   r<   asarray)leftrightkvs       r   rp   rp   T  sW    

 I I1
#BJqMM2:eAh3G3GHHHHI Ir   )__doc__r   r   r   numpyr<   rk   pandas._libs.parsers_libsparsersrm   r   pandas.errorsr   pandasr   pandas._testing_testingr:   pandas.io.parsersr   r	   "pandas.io.parsers.c_parser_wrapperr
   r   rp   r   r   r   <module>r      s=          
      % % % % % % % % % + + + + + + ' ' ' ' ' '                    A @ @ @ @ @v2 v2 v2 v2 v2 v2 v2 v2r	I I I I Ir   