
    z-Phʋ                     .   d dl Z d dlmZ d dlZd dlZd dlmZ d dlmZ d dl	Z	d dl
Zd dl
mZ d dlmZ d dlmZmZmZ 	 d dlmZ d dlmZmZ n# e$ r dZY nw xY w	 d dlZd dlmZ d d	lmZ d d
lm Z  n# e$ r dxZZY nw xY w	 d dl!Z"n# e$ r dZ"Y nw xY we	j#        j        Z$d Z%d Z&e	j#        j        d             Z'e	j#        j        d             Z(e	j#        j        d             Z)e	j#        j        d             Z*e	j#        j        d             Z+d Z,d Z-e	j#        j.        d             Z/e	j#        j        d             Z0e	j#        j        d             Z1d Z2e	j#        j        d             Z3d Z4e	j#        5                    dd ej6                    g          e	j#        5                    dd          d                         Z7d Z8d Z9d  Z:d! Z;d" Z<d# Z=d$ Z>d% Z?e	j#        j        d&             Z@e	j#        j        d'             ZAe	j#        j        d(             ZBd) ZCd* ZDe	j#        j        d+             ZEd, ZFe	j#        j        e	j#        jG        e	j#        H                    d-          e	j#        H                    d.          d/                                                 ZIe	j#        5                    d0d1 d2 d3 d4 g          e	j#        5                    d5d6d7g          d8                         ZJd9 ZKd: ZLd; ZMd< ZNd= ZOd> ZPe	j#        jQ        d?             ZRdS )@    N)OrderedDict)copytree)Decimal)fs)util)_check_roundtrip_roundtrip_table_test_dataframe)_read_table_write_table)dataframe_with_lists)alltypes_samplec                 X   t          j        dg di          }t          j        t          d          5  t          || dz  d           d d d            n# 1 swxY w Y   t          j        t          d          5  t          || dz  d	           d d d            d S # 1 swxY w Y   d S )
Na         z"Unsupported Parquet format versionmatchztest_version.parquetz2.2versionz%Unsupported Parquet data page version)data_page_version)patablepytestraises
ValueErrorr   )tempdirr   s     `/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/pyarrow/tests/parquet/test_basic.pytest_parquet_invalid_versionr!   ;   sW   Hc999%&&E	z)M	N	N	N M MUG&<<eLLLLM M M M M M M M M M M M M M M	z *! 
" 
" 
" . .UG&<<',	. 	. 	. 	.. . . . . . . . . . . . . . . . . .s#   AAA<BB#&B#c                      t          j        g ddz            } t           j                            | gdg          }ddg}|D ]}t	          ||           d S )Nr   i f0namesi   i   )data_page_size)r   arrayTablefrom_arraysr   )arrt
page_sizestarget_page_sizes       r    test_set_data_page_sizer.   E   su    
(999v%
&
&C
cU4&11A 7#J& = =+;<<<<<= =    c                      t          d          } t          j                            | d          }t	          |ddd           d S )Nd   Fpreserve_index
   r   2.4)r&   write_batch_sizer   )r
   r   r(   from_pandasr   dfr   s     r    test_set_write_batch_sizer:   O   sS    			BH  E ::Eb1e     r/   c                     t          d          } t          j                            | d          }t	          |ddd           t          j        t                    5  t	          |ddd           d d d            d S # 1 swxY w Y   d S )	Nr1   Fr2   r   r4   r5   )dictionary_pagesize_limitr&   r   r   )r
   r   r(   r7   r   r   r   	TypeErrorr8   s     r    "test_set_dictionary_pagesize_limitr>   Y   s    			BH  E ::EUa$&7 7 7 7 
y	!	! ; ;#(*E	; 	; 	; 	;; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ;s   A>>BBc            	         g } t           j                            t          d                    }|                     t           j                            |gdz                       t                      \  }}t           j                            |          }|                     t           j                            |gdz                       dD ]}dD ]}| D ]}t          |d||            d S )Nr4   sizer   )z1.0z2.0)TF2.6)r   r   use_dictionary)	r   RecordBatchr7   r   appendr(   from_batchesr   r   )tablesbatchr9   _r   rC   r   s          r    test_chunked_table_writerJ   f   s    FN&&B'?'?'?@@E
MM"(''!44555 ""EBN&&r**E
MM"(''!44555+ 3 3+ 	3 	3N 3 3 5&7#13 3 3 3 33	33 3r/   c                 ~   t          d          }t          j                            |          }t	          |ddid           t          | dz            }t          |d          5 }t          ||d	           d d d            n# 1 swxY w Y   t          j	        |d
          }|
                    |          sJ d S )Nr4   r@   
memory_mapTrB   read_table_kwargsr   tmp_filewbr   )rL   r   r   r(   r7   r   stropenr   pqread_pandasequalsr   r9   r   filenamef
table_reads         r    test_memory_mapr[   y   s	   	b	!	!	!BH  $$EU|T.B"$ $ $ $ 7Z'((H	h		 .UAu----. . . . . . . . . . . . . . .T:::JU#######   &BB	B	c                 ~   t          d          }t          j                            |          }t	          |ddid           t          | dz            }t          |d          5 }t          ||d	           d d d            n# 1 swxY w Y   t          j	        |d
          }|
                    |          sJ d S )Nr4   r@   buffer_sizei  rB   rM   rO   rP   r   i   )r^   rQ   rW   s         r    test_enable_buffered_streamr_      s	   	b	!	!	!BH  $$EU}d.C"$ $ $ $ 7Z'((H	h		 .UAu----. . . . . . . . . . . . . . .d;;;JU#######r\   c                 v   t           j                            t          j        dg          gdg          }d}| |z  }|                                rJ t          |t          |                     |                                sJ t          t          |                    }|                    |          sJ d S )N*   intsz	foo # bar)	r   r(   r)   r'   existsr   rR   r   rV   )r   r   rX   pathrZ   s        r    test_special_chars_filenamere      s    H  "(B4..!1F8<<EHXD{{}}D		""";;==SYY''JU#######r/   c                  $   t          j        t          d          5  t          j        d            d d d            n# 1 swxY w Y   t          j        t          d          5  t          j        d            d d d            d S # 1 swxY w Y   d S )NNoner   )r   r   r=   rT   
read_tableParquetFile r/   r    test_invalid_sourcerk      s	    
y	/	/	/  
d               
y	/	/	/  
t                 s!   =AA#BB	B	c                      t          j        t          t          d                    gdg          } t	          | d           d S )Ni@  r#   r$   r   )row_group_size)r   r   listranger   )r+   s    r    (test_file_with_over_int16_max_row_groupsrp      sD     	$uU||$$%dV444AQq))))))r/   c                     t          d          } t          j                            |           }t          j                            d |                                D             |j        j                  }|j                            d          j	        t          j
                    k    sJ |j                            d          j	        t          j        t          j
                              k    sJ t          |d           d S )	Nr4   r@   c                 H    g | ]}|                     d           dd           S )r   N)chunk).0cols     r    
<listcomp>z.test_empty_table_roundtrip.<locals>.<listcomp>   s+    999c1bqb	999r/   r$   null	null_listrB   r   )r   r   r(   r7   r)   itercolumnsschemar%   fieldtyperw   list_r   r8   s     r    test_empty_table_roundtripr~      s    	b	!	!	!B H  $$EH  99U%6%6%8%8999l  ! " "E <f%%*bgii7777<k**/28BGII3F3FFFFFu     r/   c                      t          j                    } t          j                            | d          }t          |           d S )NFr2   )pd	DataFramer   r(   r7   r   )r9   emptys     r    test_empty_table_no_columnsr      s;    	BH  E ::EUr/   c                  t   t          t          j                    t          j        t          j                                        g t          dd          gg} fd| D             }fd|D             }t          j                            |t          j                            }t          |           d S )N)int32list_stringr   )Gc                     g | ]<}t          j        |t          j                                                             =S )r|   )r   r'   structflattenrt   rH   colss     r    rv   zEtest_write_nested_zero_length_array_chunk_failure.<locals>.<listcomp>   sL     $ $ $ %bioo666>>@@ $ $ $r/   c                 v    g | ]5}t           j                            |t          j                             6S ))rz   )r   rD   r)   rz   r   s     r    rv   zEtest_write_nested_zero_length_array_chunk_failure.<locals>.<listcomp>   sF     * * * .,,U29T??,KK * * *r/   )	r   r   r   r}   stringr(   rF   rz   r   )data	my_arrays
my_batchestblr   s       @r    1test_write_nested_zero_length_array_chunk_failurer      s    hjjHRY[[))  D 1&999<=D$ $ $ $"$ $ $I* * * *(* * *J
(


BIdOO
<
<CSr/   c                 $   | dz  }t          j        dt          j        dt          j                  i          }t          ||           t          |          }|                                }t          j	        ||           t          |           dz   }t          j        dt          j        dt          j                  i          }t          ||           t          |          }|                                }t          j	        ||           d S )Nzzzz.parquetxr4   dtype)r   r   nparangeint64r   r   	to_pandastmassert_frame_equalrR   )r   rd   r9   rZ   df_reads        r    test_multiple_path_typesr      s     ]"D	sBIb999:	;	;BTT""J""$$G"g&&& w<<-'D	sBIb999:	;	;BTT""J""$$G"g&&&&&r/   c                 |   | dz  }t          j        dg di          }t          ||           t          j        |          }t          |          }|                    |          sJ t          j        t                    5  t          |t          j                               d d d            d S # 1 swxY w Y   d S )Ntest.parquetr   r   
filesystem)r   r   r   r   FSProtocolClassr   rV   r   r   r=   r   
FileSystem)r   rd   r   fs_protocol_objresults        r    test_fspathr      s   ^#DHc999%&&E*400O))F== 
y	!	! A AO@@@@A A A A A A A A A A A A A A A A A As   #B11B58B5r   name)data.parquetu   例.parquetc                 f   t          j        dg di          }| |z  }t          j        |t	          |                     t          j        |           5  t          j        ||          }d d d            n# 1 swxY w Y   |                    |          sJ |	                                 |
                                rJ t          j        |           5  t          j        |||           d d d            n# 1 swxY w Y   t          j        |          }|                    |          sJ d S )Nr   r   r   )r   r   rT   write_tablerR   r   
change_cwdrh   rV   unlinkrc   )r   r   r   r   rd   r   s         r    test_relative_pathsr     s    Hc999%&&ET>D N5#d))$$$		!	! < <t
;;;< < < < < < < < < < < < < < <==KKMMM{{}} 
	!	! ; ;
udz::::; ; ; ; ; ; ; ; ; ; ; ; ; ; ;]4  F==s$   A77A;>A;C;;C?C?c                      t          j        t                    5  t          j        d           d d d            d S # 1 swxY w Y   d S )Nzi-am-not-existing.parquet)r   r   FileNotFoundErrorrT   rh   rj   r/   r    test_read_non_existing_filer   &  s    	(	)	) 3 3
12223 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3s   <A A c                       G d dt           j                  } t          j        t          d          5  t          j         | d                     d d d            d S # 1 swxY w Y   d S )Nc                       e Zd Zd Zd ZdS )3test_file_error_python_exception.<locals>.BogusFilec                      t          d          NzorglubZeroDivisionErrorselfargss     r    readz8test_file_error_python_exception.<locals>.BogusFile.read.      #I...r/   c                      t          d          r   r   r   s     r    seekz8test_file_error_python_exception.<locals>.BogusFile.seek1  r   r/   N)__name__
__module____qualname__r   r   rj   r/   r    	BogusFiler   -  s2        	/ 	/ 	/	/ 	/ 	/ 	/ 	/r/   r   r   r   r/   )ioBytesIOr   r   r   rT   rh   )r   s    r     test_file_error_python_exceptionr   ,  s    / / / / /BJ / / / 
(		:	:	: & &
iinn%%%& & & & & & & & & & & & & & & & & &s   AA#&A#c                 0   t          j        dg di          }t          j        |t	          | dz                       t          t	          | dz            d          5 }t          j        |          }d d d            n# 1 swxY w Y   |                    |          sJ t          t	          | dz            d          5 }t          j        t          j        |                    }d d d            n# 1 swxY w Y   |                    |          sJ d S )Nr   r   r   rb)	r   r   rT   r   rR   rS   rh   rV   
PythonFile)r   r   rY   r   s       r    test_parquet_read_from_bufferr   9  su   Hc999%&&EN5#g677888	c'N*++T	2	2 "aq!!" " " " " " " " " " " " " " "==	c'N*++T	2	2 1ar}Q//001 1 1 1 1 1 1 1 1 1 1 1 1 1 1==s$   A??BB'C44C8;C8c            
      ^   t          j        t          t          t          t          d                                        } t          j        t          t          t          t          d                                        }t          j        ddgdz            }| | g}t           j                            |ddg          }t          ||ddd	           t          ||ddgdg	           t          ||dddgddg	           t           j                            | | ||gg d
          }t          ||ddgddg           t           j                            |gdg          }t          j        t          d          5  t          ||dd           d d d            d S # 1 swxY w Y   d S )Nr1   TF2   r   br$   gzip)expectedcompressionrC   use_byte_stream_splitr   r   cdr   r   )r   rC   r   tmpBYTE_STREAM_SPLIT only supportsr   )r   r   rC   )r   r'   rn   mapfloatro   intr(   r)   r   r   r   IOError)	arr_floatarr_intarr_bool
data_floatr   mixed_tables         r    test_byte_stream_splitr   G  s%   c%s445566IhtCU3ZZ001122Gxu*++HY'JH  C: >>E UU$)G G G G
 UU%(E,/52 2 2 2
 UU%(#J,/:7 7 7 7
 (&&	9gw'O-A-A-A ' C CK[;%(#J,/:7 7 7 7 H  (E7 ;;E	w&G	H	H	H / /d(-	/ 	/ 	/ 	// / / / / / / / / / / / / / / / / /s   F""F&)F&c           
         t          j        t          t          t          t          d                              t          j        dd                    }t          j        t          t          t          t          d                              t          j        dd                    }t          j        t          t          t          t          d                              t          j        dd                    }t          j        dd	gd
z            }|||g}t           j                            |g d          }t          ||dd	d           t          j                            | d          }t          j        ||dd	d           t          j        |          }|j                            d          }	|j                            d          }
|	j        dk    sJ |
j        dk    sJ t          ||dd	dddd           t           j                            ||||gg d          }t          ||d	d           d S )Nr1      r   r      	      TFr   r   r   r   r$   r   )r   r   rC   store_decimal_as_integerr   )r   rC   r   r   r   INT32INT64DELTA_BINARY_PACKEDr   r   )r   r   rC   r   column_encodingr   )r   rC   r   )r   r'   rn   r   r   ro   
decimal128r(   r)   r   osrd   joinrT   r   ri   rz   columnphysical_type)r   arr_decimal_1_9arr_decimal_10_18arr_decimal_gt18r   data_decimalr   pqtestfile_path
pqtestfilepqcol_decimal_1_9pqcol_decimal_10_18r   s               r    test_store_decimal_as_integerr   m  sj   htCs$<$<==$&M!Q$7$79 9 9Oc'5::&>&>!?!?&(mB&:&:< < <xS%**%=%= > >%']2q%9%9; ; ;xu*++H#%68HILH  ___ EEE U#!'$).2	4 4 4 4 gll7N;;ON5/%"',02 2 2 2
 00J")0033$+22155*g5555,7777 U#!'$).233& &    (&&	+-=xH""" ' $ $K [)$).24 4 4 4 4 4r/   c                     t          j        t          t          t          t          d                                        } t          j        t          t          t          t          d                                        }t          j        d t          d          D             t          j                              }t          j        d t          d          D             t          j        d                    }t          j        g ddz            }t           j        	                    | ||||gg d	          }t          ||d
ddddd           t          ||d
d           t          ||d
dddd           t          ||d
dddd           t          ||d
ddddd           t          ||d
ddi           t          j        t          d          5  t          ||d
dddd           d d d            n# 1 swxY w Y   t          j        t          d          5  t          ||d
dddd           d d d            n# 1 swxY w Y   t          j        t          d          5  t          ||d
d           d d d            n# 1 swxY w Y   t          j        t          d          5  t          ||d
ddi           d d d            n# 1 swxY w Y   t          j        t                    5  t          ||dgddi           d d d            n# 1 swxY w Y   t          j        t                    5  t          ||ddi           d d d            n# 1 swxY w Y   t          j        t                    5  t          ||d
dgdddd            d d d            n# 1 swxY w Y   t          j        t                    5  t          ||d
d!dddd            d d d            n# 1 swxY w Y   t          j        t                     5  t          ||d
d!           d d d            d S # 1 swxY w Y   d S )"Nr1   c                 ,    g | ]}t          |          S rj   )rR   rt   r   s     r    rv   z(test_column_encoding.<locals>.<listcomp>  s    3331A333r/   r   c                 R    g | ]$}t          |                              d           %S )r4   )rR   zfillr   s     r    rv   z(test_column_encoding.<locals>.<listcomp>  s*    ...aQb		...r/   r4   )FTFF   )r   r   r   r   er$   FBYTE_STREAM_SPLITPLAINr   )r   rC   r   r   r   DELTA_LENGTH_BYTE_ARRAYDELTA_BYTE_ARRAYr  RLEr   r   )r   r   r  z)DELTA_BINARY_PACKED encoder only supportsz+'RLE_DICTIONARY' is already used by defaultRLE_DICTIONARYz/Unsupported column encoding: 'MADE_UP_ENCODING'r   MADE_UP_ENCODINGr   )r   r   )r   rC   r   r   T)r   r'   rn   r   r   ro   r   binaryr(   r)   r   r   r   r   OSErrorr   r=   )r   r   arr_binarr_flbar   r   s         r    test_column_encodingr    s   c%s445566IhtCU3ZZ001122Gh33c

333")++FFFGx..5::...RYr]]D D DHx333b899H(&&	GWh9''' ' ) )K [;u+>+>+2+>&@ &@A A A A [;$)%,. . . .
 [;$)+2+@+2&4 &45 5 5 5 [;$)+2+@+D&F &FG G G G [;$)+2+@+=+=&? &?@ @ @ @ [;$)&)5\3 3 3 3 
w>
@ 
@ 
@ E E{(-/6/6/B*D *D	E 	E 	E 	EE E E E E E E E E E E E E E E 
wH
J 
J 
J 9 9{(-/D/6/6*8 *8	9 	9 	9 	99 9 9 9 9 9 9 9 9 9 9 9 9 9 9 
zJ
L 
L 
L ; ;{(-)9	; 	; 	; 	;; ; ; ; ; ; ; ; ; ; ; ; ; ; ; 
zN
P 
P 
P D D{(-*-/A)B	D 	D 	D 	DD D D D D D D D D D D D D D D 
z	"	" 9 9{),*-w	9 	9 	9 	99 9 9 9 9 9 9 9 9 9 9 9 9 9 9 
z	"	" 9 9{*-w	9 	9 	9 	99 9 9 9 9 9 9 9 9 9 9 9 9 9 9 
z	"	" 9 9{(-03u/4/B/6*8 *8	9 	9 	9 	99 9 9 9 9 9 9 9 9 9 9 9 9 9 9 
z	"	" 9 9{(-/3/4/B/6*8 *8	9 	9 	9 	99 9 9 9 9 9 9 9 9 9 9 9 9 9 9 
y	!	! / /{(-)-	/ 	/ 	/ 	// / / / / / / / / / / / / / / / / /s   "HH
H
,III6JJJ<KK"%K"L%%L),L)	M**M.1M.N44N8;N8O==PP!QQ	Qc            
         t          j        t          t          t          t          d                                        } | | g}t           j                            |ddg          }t          ||dd           t          ||dd           t          ||dd	d
           t          ||dddd
           t          ||dd           t          ||dd           g d}t          j
                    }|D ]P\  }}t          j        t          t          f          5  t          ||||           d d d            n# 1 swxY w Y   Qd S )N  r   r   r$   r   r   )r   r   compression_levelr   snappyr   )r   r   r   r   lz4r   ))r     )r   i)rg   i  )lzo   )r   r  )r   r'   rn   r   r   ro   r(   r)   r   r   r   r   r   r   r  r   )r*   r   r   invalid_combinationsbufcodeclevels          r    test_compression_levelr  #  s   
(4Ct--..
/
/C:DH  c3Z 88E UU'(* * * *
 UU'(* * * * UU'-H!=!=? ? ? ? UU-.Q'7'79 9 9 9
 UU'(* * * * UU'(* * * *8 8 8
*,,C. 2 2]J011 	2 	2+02 2 2 2	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	22 2s   D>>E	E	c                      t          j        g d          } d}t           j                            | g|g          }t	          |ddi          }d}|j        d         j        |k    sJ d S )N)r   r   r   r   r  zprohib; ,	{}flavorspark)write_table_kwargsprohib______r   )r   r'   r(   r)   r	   rz   r   )a0r   r   r   expected_names        r     test_sanitized_spark_field_namesr%  P  sr    	///	"	"BDH  "v..Ee78KLLLF"M= M111111r/   c                  v   t          d          } t          j                            |           }t	          j                    }t          ||dd           |                    d           t          |d          }|                    d           t          |d	          }|	                    |          sJ d S )
Ni'  r@   SNAPPYrB   )r   r   r   T)use_threadsF)
r   r   r(   r7   r   r   r   r   r   rV   )r9   r   r  table1table2s        r    test_multithreaded_readr+  [  s    	e	$	$	$BH  $$E
*,,C5AAAAHHQKKK$///FHHQKKK%000F==       r/   c                     t          j        t          j        d          gg d          } t          j                            |                                           }t          j	                    }t          ||d           |                    d           t          |          }|                    |          sJ t          j        t                     5  t          ||d           d d d            d S # 1 swxY w Y   d S )Nr  )ABCD)columns)
chunk_sizer   )r   r   r   r   r   r(   r7   reset_indexr   r   r   r   r   rV   r   r   r   )r   r   r  r   s       r    test_min_chunksizer5  m  s%   <10D0D0DEEEDH  !1!1!3!344E
*,,C++++HHQKKKF==	z	"	" / /UCA..../ / / / / / / / / / / / / / / / / /s   C66C:=C:c                    t          j        t          d          t          t          dd                    t	          j        dd                              d          t	          j        ddd	
          g dt          j        t          d                    t          j        dd          t          j        ddd          t          j        ddd          d	          }t          j
                            |          }| dz  }	 t          ||d           n# t          j        $ r Y nw xY w|                                rJ d S )Nabcr   r  r      u1      @      @float64r   TFT20130101periodsz
US/Eastern)r@  tzns)r@  freq)	r   r   r   r   r  rY   ghirO   r5   r   )r   r   rn   ro   r   r   astypeCategorical
date_ranger   r(   r7   r   ArrowExceptionrc   )r   r9   pdfrX   s       r    (test_write_error_deletes_incomplete_filerL  ~  sK    
DKK q!--IaOO22488Ic3i@@@///N4;;77M*a@@@M*a-9; ; ;M*adKKK	M 	M 	
N 	
NB (

r
"
"C#H 	S(E22222           s   D D+*D+c                     d}	 t          j        |           d S # t          $ r}||j        d         v sJ Y d }~d S d }~ww xY w)Nznonexistent-file.parquetr   )rT   rh   	Exceptionr   )r   rd   r  s      r    test_read_non_existent_filerO    sj    %D!
d ! ! !qvay          !s    
A ;A c                     t          j                    5  t          j        d           t          j        | dz             d d d            d S # 1 swxY w Y   d S )Nerror)actionzv0.7.1.parquet)warningscatch_warningssimplefilterrT   rh   )datadirs    r    test_read_table_doesnt_warnrW    s    		 	"	" 2 2W----
g 001112 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2s   -AAAc                     t           j                            t          j        ddg          gdg          } t	          j                    }t          j        | |d           |                    d           t          j	        |          }t          j        |                                |                                            d S )Nr7  defsome_colr   r   r   )r   r(   r)   r'   r   r   rT   r   r   rh   r   r   r   )r   rY   	roundtrips      r    test_zlib_compression_bugr]    s     H  "(E5>":":!;j\JJE

AN5!0000FF1IIIa  I)--//1B1BCCCCCr/   c                 D   t          | dz            }t          j        t          j        t
          fd          5  t          |d          5 }	 d d d            n# 1 swxY w Y   t          j        |           d d d            n# 1 swxY w Y   t          j        t          j        t
          fd          5  t          |d          5 }|	                    d           d d d            n# 1 swxY w Y   t          j        |           d d d            d S # 1 swxY w Y   d S )Nr   zsize is 0 bytesr   rP   zsize is 4 bytess   ffff)
rR   r   r   r   ArrowInvalidr  rS   rT   rh   write)r   rd   rY   s      r    test_parquet_file_too_smallra    s   w'((D	1.
0 
0 
0  $ 		 	 	 	 	 	 	 	 	 	 	 	 	 	 	
d	               
1.
0 
0 
0  $ 	GGG	 	 	 	 	 	 	 	 	 	 	 	 	 	 	
d	                 sk   BABA	B A	!BBB6DC)D)C-	-D0C-	1DDDzignore:RangeIndex:FutureWarningz.ignore:tostring:DeprecationWarning:fastparquetc                 4   t          j        d          }t          j        t	          d          t	          t          dd                    t          j        ddd          g d	t          j        d
d          t          j	        g d          d          }t          j        |          }t          | dz            }t          j        ||d            |                    |          }|                                }t#          j        ||           t          | dz            }|                    ||           t          j        |          }|d                             t,                    |d<   t#          j        |                                |           d S )Nfastparquetr7  r   r  r:  r;  r<  r   r=  r>  r   r?  )r   r   r   )r   r   r   r   r  rY   zcross_compat_arrow.parquetr[  z cross_compat_fastparquet.parquetrY   )r   importorskipr   r   rn   ro   r   r   rI  rH  r   r   rR   rT   r   ri   r   r   r   r`  rU   rG  object)	r   fpr9   r   
file_arrowfp_filedf_fpfile_fastparquettable_fps	            r    $test_fastparquet_cross_compatibilityrl    sv   
 
	]	+	+B	eeAqkk""39555$$$z155500		
 		

 
B HRLLE W;;<<JN5*$7777nnZ((GE"e$$$ 7%GGHHHHr"""~.//H gnnV$$BsG(,,..33333r/   array_factoryc                  4    t          j        dd gdz            S Nr   r4   r   r'   rj   r/   r    <lambda>rq    s    BHaY^$$ r/   c                  X    t          j        dd gdz                                            S ro  r   r'   dictionary_encoderj   r/   r    rq  rq    s$    BHaY^$$6688 r/   c                  4    t          j        dd gdz            S N r4   rp  rj   r/   r    rq  rq    s    BHb$Z"_%% r/   c                  X    t          j        dd gdz                                            S rv  rs  rj   r/   r    rq  rq    s$    BHb$Z"_%%7799 r/   read_dictionaryFTc                    t           j                            d |             i          }t          j                    }t          j        ||d           |                    d           |rdgnd }t          j        |d|          }|j	        D ]G}|j
        \  }|                                d         }|                                |j        dz  k    sJ Hd S )	Nru   T)rC   r   F)r(  ry  r       )r   r(   from_pydictr   r   rT   r   r   rh   r1  chunksbuffers
to_pybytesrA   )rm  ry  
orig_tablebior   ru   rs   r  s           r    test_buffer_contentsr    s     %%ummoo&>??J
*,,CN:s48888HHQKKK!0:uggdOM#5*9; ; ;E } 4 4*mmooa ~~38e#3333334 4r/   c                    t          j        t          j        t          d                    gdg          }| dz  }t	          j        ||d           t	          j        |          }|                    |          sJ d S )Nr  rb   r$   zarrow-10480.pyarrow.gzGZIPr[  )r   r   r'   ro   rT   r   rh   rV   )r   r   rd   r   s       r    "test_parquet_compression_roundtripr    s|    
 HbhuQxx(()&:::E--DN5$F3333]4  F==r/   c                    t           j                            t          j        g d          gdg          }| dz  }d}t	          j        ||j                  5 }t          |          D ]}|                    |           	 d d d            n# 1 swxY w Y   t	          j	        |          }|j
        j        |k    sJ t          |          D ],}|                    |                              |          sJ -d S )Nr   r   r#   zempty_row_groups.parquetr   )r   r(   r)   r'   rT   ParquetWriterrz   ro   r   ri   metadatanum_row_groupsread_row_grouprV   )r   r   rd   
num_groupswriterrF  readers          r    test_empty_row_groupsr    sJ   H  "(2G"<"<"<!=vFFE//DJ		$	-	- &z"" 	& 	&Au%%%%	&& & & & & & & & & & & & & & & ^D!!F?)Z7777: 6 6$$Q''..u5555556 6s   (BBBc                     d gdz  }|                     dg           t          j                            |gdg          }| dz  }t	          j        ||           t	          j        |          }||k    sJ d S )Ni   r   r   zarrow-11607.parquet)rE   r   r(   r)   rT   r   rh   )r   r   r   rd   r*  s        r    test_reads_over_batchr  $  s|    6WDKK H  $(44E**DN5$]4  FF??????r/   c                    | dz  }|                     d           t          j        g dg dgddg          }t          j        ||d	z             t          j        g d
g dgddg          }t          j        ||dz             t          j        t          |                    }t          j        g dg dgddg          }||k    sJ d S )N dataset_column_order_permutationT)exist_okr   )皙?皙?333333?r   r   r$   zdata1.parquet)皙?      ?333333?)r  r   r8  zdata2.parquet)r   r   r   r  r   r8  )r  r  r  r  r  r  )mkdirr   r   rT   r   rh   rR   )r   casedata1data2r   r*  s         r     test_permutation_of_column_orderr  2  s    77DJJJHiii.sCjAAAEN5$0111HlllIII.sCjAAAEN5$0111M#d))$$EX)))5557 #J( ( (F F??????r/   c                    | dz  }t          j        t          t          d                              }d}t          j        |g|z  d t          |          D                       }t          j        ||           t          j        t          d          5  t          j
        |d|z  	           d d d            n# 1 swxY w Y   t          j        t          d          5  t          j
        ||
           d d d            n# 1 swxY w Y   t          j
        |d|z  	          }||k    sJ t          j
        |d|z  
          }||k    sJ t          j
        |          }||k    sJ d S )Nzlargethrift.parquetr4   r  c                     g | ]}d | S )some_long_column_name_rj   )rt   rF  s     r    rv   z+test_thrift_size_limits.<locals>.<listcomp>L  s!    EEE+++EEEr/   r$   z1Couldn't deserialize thrift:.*Exceeded size limitr   r   )thrift_string_size_limit)thrift_container_size_limitr1   r   )r   r'   rn   ro   r   rT   r   r   r   r  rh   )r   rd   r'   num_colsr   gots         r    test_thrift_size_limitsr  E  s'   **DHT%))__%%EHH	(EEU8__EEEG G GE N5$	E
G 
G 
G D D 	dR(]CCCCD D D D D D D D D D D D D D D 
E
G 
G 
G B B 	dAAAAB B B B B B B B B B B B B B B
 -sX~
F
F
FC%<<<<
-!h,
G
G
GC%<<<<
-

C%<<<<<<s$   B==CC#DD
D
c                    | dz  }t          j        dg di          }t          j        ||d           t          j        |d          }||k    sJ t          |                                          }|d         |d         k    sJ |d         |d         c|d<   |d<   | d	z  }|                    |           t          j        |d
          }||k    sJ |t          j        dg di          k    sJ t          j	        t          d          5  t          j        |d          }ddd           n# 1 swxY w Y   t          j        |d
          }|                                }	|	|k    sJ |	t          j        dg di          k    sJ t          j        |d          }t          j	        t          d          5  |                                }ddd           dS # 1 swxY w Y   dS )zUCheck that checksum verification works for datasets created with
    pq.write_table()zcorrect.parquetr   r   r   r   r  Twrite_page_checksumpage_checksum_verification   $   zcorrupted.parquetFr   r   r   r  CRC checksum verificationr   N)r   r   rT   r   rh   	bytearray
read_byteswrite_bytesr   r   r  ri   r   )
r   original_path
table_origtable_checkbin_datacorrupted_pathtable_corruptrI   corrupted_pq_filetable_corrupt2s
             r    +test_page_checksum_verification_write_tabler  `  s   
 //M3-..JN:}$GGGG -$OOOK$$$$
 113344H B<8B<''''!)"x|HRL(2, 22Nx((( M.=BD D DM J&&&&BHc<<<%8999999 
w&A	B	B	B K KM.TJJJK K K K K K K K K K K K K K K
 ~BGI I I&++--NZ''''RXsLLL&9:::::: ~BFH H H 
w&A	B	B	B % %""$$% % % % % % % % % % % % % % % % % %s$   D33D7:D7?G!!G%(G%c                 2   t          j        dg di          }| dz  }t          j        ||d           t	          |                                          }t          |          dk    sJ |d         }t          j        |d          }||k    sJ t          |	                                          }|d	         |d
         k    sJ |d
         |d	         c|d	<   |d
<   | dz  }t          ||           ||j        z  }|                    |           t          j        |d          }	|	|k    sJ |	t          j        dg di          k    sJ t          j        t          d          5  t          j        |d          }
ddd           dS # 1 swxY w Y   dS )zXCheck that checksum verification works for datasets created with
    pq.write_to_datasetr   r  correct_dirTr  r   r   r  r  r  corrupted_dirFr  r  r   N)r   r   rT   write_to_datasetrn   iterdirlenrh   r  r  r   r   r  r   r   r  )r   r  original_dir_pathoriginal_file_path_listr  r  r  corrupted_dir_pathcorrupted_file_pathr  rI   s              r    test_checksum_write_to_datasetr    sA   
 3-..J  -/
),02 2 2 2
 ##4#<#<#>#>??&''1,,,,+A.M-$OOOK$$$$
 113344H B<8B<''''!)"x|HRL(2, !?2 2333,}/AA##H--- M"5=BD D DM J&&&&BHc<<<%8999999 
w&A	B	B	B P PM-$OOOP P P P P P P P P P P P P P P P P Ps   (FFF)Sr   collectionsr   r   rS  shutilr   decimalr   r   pyarrowr   r   pyarrow.testsr   pyarrow.tests.parquet.commonr   r	   r
   pyarrow.parquetparquetrT   r   r   ImportErrorpandasr   pandas.testingtestingr   pyarrow.tests.pandas_examplesr   r   numpyr   mark
pytestmarkr!   r.   r:   r>   rJ   r[   r_   re   rk   slowrp   r~   r   r   r   r   parametrizeLocalFileSystemr   r   r   r   r   r   r  r  r%  r+  r5  rL  rO  rW  r]  ra  rc  filterwarningsrl  r  r  r  r  r  r  r  datasetr  rj   r/   r    <module>r     sZ  $ 
			 # # # # # # 				                              ; ; ; ; ; ; ; ; ; ;      FFFFFFFFF   	BBBBBBBBB<<<<<<<   NB   	BBB
 [ 
. . .= = =    	; 	; 	; 3 3 3$ $ $ $ $ $ $$ $ $   * * *        8 ' ' '$A A A  
"


(   !@AA    BA  *3 3 3
& 
& 
&     #/ #/ #/L34 34 34l}/ }/ }/@*2 *2 *2Z2 2 2 ! ! !" / / /  ! ! !4! ! !2 2 2 D D D    =>>LMM!4 !4 NM ?>  !4H $$88%%99	+   *UDM::4 4 ;: 4(	  	  	 6 6 6$    &  67% 7% 7%t 1P 1P 1P 1P 1Ps5   A AAA2 2	A>=A>B BB