
    z-PhX                     
   d dl Z d dlZ	 d dlZn# e$ r dZY nw xY wd dlZd dlZd dlm	Z	m
Z
 d dlmZ d dlmZ 	 d dlmZ d dlmZmZmZ n# e$ r dZY nw xY w	 d dlZd dlmZ d dlmZmZ n# e$ r dxZZY nw xY wej        j        Zej        j        d             Zej        j        d             Z ej        j        d	             Z!ej        j        d
             Z"ej        j        d             Z#ej        j        d             Z$ej        j        d             Z%ej        j        d             Z&ej        j        d             Z'ej        j        d             Z(ej        j        d             Z)ej        j        d             Z*ej        j        d             Z+ej        j        d             Z,ej        j        d             Z-ej        j        d             Z.ej        j        d             Z/ej        j        d             Z0ej        j        d             Z1ej        j        d             Z2ej        j        d             Z3ej        j        d             Z4ej        j        d             Z5ej        j        d             Z6ej        j        ej        7                    dg d           ej        7                    d!d"d#g          d$                                     Z8ej        j        d%             Z9ej        j        d&             Z:dS )'    N)LocalFileSystemSubTreeFileSystem)guid)Version)_read_table_test_dataframe_write_table)_roundtrip_pandas_dataframealltypes_samplec                 p   t          d          }| dz  }t          j                            |          }d|j        j        v sJ t          ||           t          j        |          j        }d|v sJ t          j
        |d                             d                    }|d         dd ddd	d
gk    sJ d S )N'  sizepandas_roundtrip.parquets   pandasutf8index_columnsranger      )kindnamestartstopstep)r   paTablefrom_pandasschemametadatar	   pqread_metadatajsonloadsdecode)tempdirdffilenamearrow_tabler   jss         a/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/pyarrow/tests/parquet/test_pandas.py#test_pandas_parquet_custom_metadatar*   7   s    	e	$	$	$B33H(&&r**K*33333h'''))2H    	HY'..v66	7	7BoG,0-.,-$/ $/ #0 0 0 0 0 0 0    c           	         t          j        t          j        dt          j                              t          j        dt          j                              t          j        dt          j                              g          }t          j        t          j	        dt          j
                  t          j	        dt          j                  g dd          }t          j        dd	gd
d gd d gd          }t           j                            ||d          }t           j                            ||d          }|j                            |j        d          rJ |j                            |j                  sJ t          j        | dz  |          }|                    |           |                    |           d S )Nintfloatstring   dtype)ABBAEDDAACDC)r-   r.   r/         g?F)r   preserve_indexT)check_metadatazmerged.parquet)r   )r   r   fieldint16float32r/   pd	DataFramenparangeuint8r   r   equalsr   ParquetWriterwrite_table)r$   r   df1df2table1table2writers          r)   :test_merging_parquet_tables_with_different_pandas_metadatarJ   K   s    Y


##
"*,,''
29;;''  F
 ,y"(+++1BJ///***   C
 ,1vt,   C
 X!!#fU!KKFX!!#fU!KKF}##FM$#GGGGG=.....g(88HHHF
v
vr+   c                    t          d          }t          j                            t	          t          |j        |j        d d d                             ddg          |_        | dz  }t          j        	                    |          }|j
        j        J t          ||           t          j        |          }|                                }t!          j        ||           d S )N
   r   level_1level_2namesr   )r   r=   
MultiIndexfrom_tupleslistzipcolumnsr   r   r   r   pandas_metadatar	   r   read_pandas	to_pandastmassert_frame_equal)r$   r%   r&   r'   
table_readdf_reads         r)   %test_pandas_parquet_column_multiindexr^   h   s    	b	!	!	!B**SRZ"-..//)$ +  BJ
 33H(&&r**K-999h'''))J""$$G"g&&&&&r+   c                    t          d          }| dz  }t          j                            |d          }|j        j        }|d         rJ |d         sJ t          ||           t          j        |          }|j        j        }|d         rJ |j        j	        }|j        j	        |k    sJ |
                                }t          j        ||           d S )Nr   r   r   Fr8   r   rV   )r   r   r   r   r   rW   r	   r   rX   r   rY   rZ   r[   )r$   r%   r&   r'   r(   r\   r    r]   s           r)   <test_pandas_parquet_2_roundtrip_read_pandas_no_index_writtenra   {   s    	e	$	$	$B33H(&&r%&@@K			+B/"""" i=h'''))J			*B/""""%.M&-7777""$$G"g&&&&&r+   c                  h   t          d          } t          j                            |           }t          j                    }t          ||d           |                                }t          j        |          }t          |          	                                }t          j        | |           d S )Nr   2.6versionr   r   r   r   BufferOutputStreamr	   getvalueBufferReaderr   rY   rZ   r[   r%   r'   imosbufreaderr]   s         r)   )test_pandas_parquet_native_file_roundtriprn      s    			B(&&r**K ""DdE2222
--//C_S!!F&!!++--G"g&&&&&r+   c                     t          d          } t          j                            |           }t          j                    }t          ||d           |                                }t          j        |          }t          j	        |ddg          
                                }t          j        | ddg         |           d S )Nr   rc   rd   stringsrA   rV   )r   r   r   r   rg   r	   rh   ri   r   rX   rY   rZ   r[   rj   s         r)   test_read_pandas_column_subsetrr      s    			B(&&r**K ""DdE2222
--//C_S!!FnG,  ikk  "i12G<<<<<r+   c                  h   t          d          } t          j                            |           }t          j                    }t          ||d           |                                }t          j        |          }t          |          	                                }t          j        | |           d S )Nr   rc   rd   rf   rj   s         r)   #test_pandas_parquet_empty_roundtriprt      s    			B(&&r**K ""DdE2222
--//C_S!!F&!!++--G"g&&&&&r+   c                      ddiddiddigdd} t          j        |           }t          j                            |          }t          j                    }t          ||           d S )	N	page_typer   record_typenon_consecutive_homer   1001)agg_col	uid_first)data)r=   r>   r   r   r   rg   r	   )r|   r%   r'   rk   s       r)   !test_pandas_can_write_nested_datar}      s     !A#Q'

  D 
4	 	 	 B(&&r**K ""Dd#####r+   c                    | dz  }d}t          j        t          j        |t          j                  t          j        |t          j                  t          j        |t          j                  t          j                            |          dk    g dd          }t          j
                            |          }|                    d          5 }t          ||d	           d d d            n# 1 swxY w Y   t          j        |                                          }t#          |          }|                                }t'          j        ||           d S )
Nzpandas_pyfile_roundtrip.parquetr7   r1   r   )foobarNbazqux)int64r<   float64boolrp   wbrc   rd   )r=   r>   r?   r@   r   r<   r   randomrandnr   r   r   openr	   ioBytesIO
read_bytesr   rY   rZ   r[   )	r$   r&   r   r%   r'   fr|   r\   r]   s	            r)   $test_pandas_parquet_pyfile_roundtripr      sl   ::HD	4rx0009T4449T444	%%)555  
 
B (&&r**K	t		 4[!U33334 4 4 4 4 4 4 4 4 4 4 4 4 4 4 :h))++,,DT""J""$$G"g&&&&&s   C22C69C6c                    d}t           j                            d           t          j        t          j        |t           j                  t          j        |t           j                  t          j        |t           j                  t          j        |t           j	                  t          j        |t           j
                  t          j        |t           j
                  t          j        |t           j                  t          j        |t           j                  t          j        |t           j                  t          j        |t           j                  t           j                            |          dk    d          }| dz  }t           j                            |          }dD ]M}t'          ||d|           t)          |          }|                                }t-          j        ||           NdD ]M}t'          ||d|	           t)          |          }|                                }t-          j        ||           Nd
D ]x}	|	dk    r%t           j        j                            |	          s-t'          ||d|	           t)          |          }|                                }t-          j        ||           yd S )Nr   r   r1   )rA   uint16uint32uint64int8r;   int32r   r<   r   r   r   )TFrc   )re   use_dictionary)re   write_statistics)NONESNAPPYGZIPLZ4ZSTDr   )re   compression)r?   r   seedr=   r>   r@   rA   r   r   r   r;   r   r   r<   r   r   r   r   r   r	   r   rY   rZ   r[   libCodecis_available)
r$   r   r%   r&   r'   r   r\   r]   r   r   s
             r)   )test_pandas_parquet_configuration_optionsr      s   DINN1	4rx000)D	222)D	222)D	222	$bh///4rx0004rx0004rx0009T4449T444	%%)  
 
B 33H(&&r**K' + +[(E$2	4 	4 	4 	4 **
&&((
b'****) + +[(E&6	8 	8 	8 	8 **
&&((
b'****@ + +6!!FL--k:: "[(E!,	. 	. 	. 	. **
&&((
b'****+ +r+   c                      t          d          } t          j        ddt          |           z  d          | _        d| j        _        t          | ddi          }t          j        ||            d S )Nd   r   r   rL   r   flavorspark)	r   r?   r@   lenindexr   r
   rZ   r[   )r%   results     r)   +test_spark_flavor_preserves_pandas_metadatar     sg    	c	"	"	"ByBRL"--BHBHM(h-@AAF&"%%%%%r+   c                 @   t          j        d          dt          j        d          dit          j        d          t          j        d          t          j        d          t          j        d          id}t          | dz            }t          j        |d                              dd	
          }t
          j                            |          }t          ||           t          |          }|
                                }t          j        ||           d S )Nz2017-06-30 01:31:00g*_c@z2017-06-30 01:32:00)closetimedata.parquetzdatetime64[us]r1   r   Fdrop)r=   	Timestampstrr>   	set_indexr   r   r   r	   r   rY   rZ   r[   )r$   r|   pathdfxtdfxr'   	result_dfs          r)    test_index_column_name_duplicater     s    L.//1CL.//1C

 L.//%2 2 L.//%2 2	
 D w'((D
 ,t#3
4
4
4
>
>vE
>
R
RC8$$Dtd##K%%''I)S)))))r+   c                    d}t          t          |                    }t          j                            g d|gddg          }t          j        d|i|          }t          j                            |          }| dz  }t          ||           t          |          }|                    |          sJ |                                }t          j        ||           d S )	Nr0   )r   r   r   foobarsome_numbersrP   numbers)r   zdup_multi_index_levels.parquet)rT   r   r=   rR   from_arraysr>   r   r   r   r	   r   rB   rY   rZ   r[   )	r$   num_rowsr   r   r%   tabler&   result_tabler   s	            r)    test_multiindex_duplicate_valuesr   8  s    H5??##GM%%			(( &  E
 
y'*%	8	8	8BH  $$E99H!!!x((L<<%%%%%&&((I)R(((((r+   c                     d}t          j        t          j        |          dd dd          }t	          | dz            }|                                }t          j        ||           d S )N  carat        cut  color  clarity  depth  table  price     x     y     z
 0.23      Ideal      E      SI2   61.5   55.0    326  3.95  3.98  2.43
 0.21    Premium      E      SI1   59.8   61.0    326  3.89  3.84  2.31
 0.23       Good      E      VS1   56.9   65.0    327  4.05  4.07  2.31
 0.29    Premium      I      VS2   62.4   58.0    334  4.20  4.23  2.63
 0.31       Good      J      SI2   63.3   58.0    335  4.34  4.35  2.75
 0.24  Very Good      J     VVS2   62.8   57.0    336  3.94  3.96  2.48
 0.24  Very Good      I     VVS1   62.3   57.0    336  3.95  3.98  2.47
 0.26  Very Good      H      SI1   61.9   55.0    337  4.07  4.11  2.53
 0.22       Fair      E      VS2   65.1   61.0    337  3.87  3.78  2.49
 0.23  Very Good      H      VS1   59.4   61.0    338  4.00  4.05  2.39\s{2,}r   pythonsep	index_colheaderenginezv0.7.1.parquet)r=   read_csvr   r   r   rY   rZ   r[   datadirexpected_stringexpectedr   r   s        r)   &test_backwards_compatible_index_namingr   N  su    KO {2:o66I%)!HF F FH"2233E__F&(+++++r+   c                     d}t          j        t          j        |          dg ddd                                          }t          | dz            }|                                }t          j        ||           d S )Nr   r   cutcolorclarityr   r   r   zv0.7.1.all-named-index.parquet)	r=   r   r   r   
sort_indexr   rY   rZ   r[   r   s        r)   1test_backwards_compatible_index_multi_level_namedr   c  s    KO {

?##---   jll	  "BBCCE__F&(+++++r+   c                 @   d}t          j        t          j        |          dg ddd                                          }|j                            g d          |_        t          | dz            }|                                }t          j
        ||           d S )	Nr   r   r   r   r   r   )r   Nr   zv0.7.1.some-named-index.parquet)r=   r   r   r   r   r   	set_namesr   rY   rZ   r[   r   s        r)   6test_backwards_compatible_index_multi_level_some_namedr   |  s    KO {

?##!<!<!<   jll	 
 ^--.F.F.FGGHN"CCDDE__F&(+++++r+   c           	         t          d          t          t          j                  k    rt          j        d           t          j        g dg dt          j        ddd          d	          }t          j                            g d	t          j        ddd          gd
d g          |_	        | dz  }t          |          }|                                }t          j        ||           t          |dg          }|                                }t          j        ||dg                             d                     d S )Nz2.2.0zRegression in pandas 2.2.0r      r0   )g?g?g333333?z
2017-01-01r0   zEurope/Brussels)periodstzabcr   rP   z'v0.7.1.column-metadata-handling.parquetr   rq   Tr   )r   r=   __version__pytestskipr>   
date_rangerR   r   r   r   rY   rZ   r[   reset_index)r   r   r   r   r   s        r)   2test_backwards_compatible_column_metadata_handlingr     sR   w72>2222 	0111|iilllmL!8IJJJ	L 	LM MH ]..		|Q3D	E	E	E	Go /  HN
 >>DE__F&(+++se  E__F&(C5/"="=4"="H"HIIIIIr+   c                  ,   t          j        ddgddggddg          } | d                             d          | d<   |                     dg          } t          j                            |           }t	          j                    }t          j	        ||           t          j
        |                                                                          }t          |j        t           j                  sJ |j                            | j                  sJ d S )	Nr   r   r   dc1c2rq   category)r=   r>   astyper   r   r   r   rg   r   rD   rX   rh   rY   
isinstancer   CategoricalIndexrB   )r%   r   bosref_dfs       r)   )test_categorical_index_survives_roundtripr     s     
SzC:.t	E	E	EB$xz**BtH	tf		BH  $$E


!
!CN5#^CLLNN++5577FflB$788888<rx(((((((r+   c                     t          j        dt          j        g dg dd          i          } t          j                            |           }t          j                    }t          j        ||           |	                                }t          j
        |                                          }t          j        ||            d S )Nr   )r   r   r   r   )r   r   r   T)
categoriesordered)r=   r>   Categoricalr   r   r   rg   r   rD   rh   rX   rY   rZ   r[   )r%   r   r   contentsr   s        r)   )test_categorical_order_survives_roundtripr     s     
sBN$H H H I 
J 
JB H  $$E


!
!CN5#||~~H^H%%//11F&"%%%%%r+   c                     t          j        d gdz  dgdz  d          } |                     ddd          }t          j                            |           }t          j                            |          }t          j                    }t          j        ||dd           t          j	        |
                                          }|d                             |d                   sJ |d	                             |d	                   sJ d S )
Nr   g      ?)colr-   r   rc   rL   )re   
chunk_sizer   r   )r=   r>   r   r   r   r   rg   r   rD   
read_tablerh   rB   )r%   df_categoryr   	table_catrl   r   s         r)   *test_pandas_categorical_na_type_row_groupsr     s     
tfslC53;??	@	@B))JzBBCCKH  $$E$$[11I


!
!C N9c5R@@@@]3<<>>**F !9E!H%%%%%!9E!H%%%%%%%r+   c                  ,   t          j        g dd          } g d}t          j        dt          j                            | |          i          }t          j                    }t          j	        t          j
        |          |           t          j        |                                                                          }|j        j        dk    sJ |j        j        j        |k                                    sJ t'          j        ||           d S )N)r   r   r   r   r   rM   r   r   r1   )r   r   r   x)r   r   )r?   arrayr=   r>   r   
from_codesr   rg   r   rD   r   r   rh   rY   r  r2   catr   allrZ   r[   )codesr   r%   rl   r   s        r)   !test_pandas_categorical_roundtripr    s    
 H+++7;;;E&&&J	sBN55* 6 & & ' 
( 
(B 

!
!CN28B<<%%%]3<<>>**4466F8>Z''''HL#z16688888&"%%%%%r+   c                    t          t          j                  t          d          k     rt          j        d           t          j        dg did          }|                    d          }t          j        dg di          }|                    d          }t          j        |d                   	                                t          j        |d                   	                                k    sJ t          j        |d         j
        j        j                  	                                t          j        |d         j
        j        j                  	                                k    sJ t          | dz            }t          j        t          j        |          |           t          j        |                                          }t'          j        ||           d S )	Nz1.3.0z:PyArrow backed string data type introduced in pandas 1.3.0r  )r   r   r   zstring[pyarrow]r1   r   zcat.parquet)r   r=   r   r   r   r>   r   r   r  	to_pylistr  r   valuesr   r   rD   r   r   rY   rZ   r[   )r$   rE   rF   r   r   s        r)   )test_categories_with_string_pyarrow_dtyper    s    r~!1!111PQQQ
,2223;L
M
M
MC
**Z
 
 C
,2223
4
4C
**Z
 
 C 8CH''))RXc#h-?-?-I-I-K-KKKKK8CHL+233==??28C&D( D((1	4 4 4 4 w&''DN28C==$''']4  **,,F&#&&&&&r+   c                    t          j        dg dd          }|d                             d          |d<   t          j        |          }t          j        |t          | dz            dg           t          j        t          | dz                      	                                }t          j        |dg         |dg                    t          j        |t          | d	z                       t          j        t          | d	z                      	                                }t          j        |dg         |dg                    t          j        |t          | d
z                       t          j        t          | d
z                      	                                }t          j        |dg         |dg                    d S )Nr   r   partr   r   Int64case1r  partition_colscase2r   )r=   r>   r   r   r   r   write_to_datasetr   r   rY   rZ   r[   rD   )r$   r%   r   r   s       r)   5test_write_to_dataset_pandas_preserve_extensiondtypesr    s   	s99955	6	6B5	  ))BuIHRLLEs7W$%%vh    ]3w01122<<>>F&%/2ug;777s7W#455666]3w01122<<>>F&%/2ug;777N5#g677888]3w78899CCEEF&%/2ug;77777r+   c                    t          j        g dg dd          }t          j        g dd          |_        t	          j        |          }|ddg                                         }|d                             d	          |d<   t          j	        |t          | d
z            dg           t          j        t          | d
z                                                      }t          j        ||           t          j	        |t          | dz                       t          j        t          | dz                                                      }t          j        ||           t          j        |t          | dz                       t          j        t          | dz                                                      }t          j        ||           d S )N)r   r   r   r   r  r   idxr   r   r  r   r  r  r  r   )r=   r>   Indexr   r   r   copyr   r   r  r   r   rY   rZ   r[   rD   )r$   r%   r   df_catr   s        r)   +test_write_to_dataset_pandas_preserve_indexr     s    
yyyAA	B	BBxe444BHHRLLE %%''FF^**:66F6Ns7W$%%vh    ]3w01122<<>>F&&)))s7W#455666]3w01122<<>>F&"%%%N5#g677888]3w78899CCEEF&"%%%%%r+   r8   )TFNmetadata_fname	_metadata_common_metadatac                    d}d}| t                      z  }|                                 g }g }g }t          |          D ]}	t          ||	          }
t	          j        t          j        |	|z  |	dz   |z  d          d          |
_        |d	                    |	          z  }t          j                            |
|	          }|                    d           }|j        j        J t!          ||           |                    |           |                    |
           |                    |           t          j                            |
|	          }t%          j        |j        ||z             t%          j        |          }d
dg|                                                              }t	          j        fd|D                       }|dur|
j        j        nd |j        _        t3          j        ||           d S )Nr7   )r   r   r   r1   r   r  z
{}.parquetr`   rA   rp   rq   c                      g | ]
}|         S  r#  ).0r  rV   s     r)   
<listcomp>z<test_dataset_read_pandas_common_metadata.<locals>.<listcomp>e  s    555!G*555r+   F)r   mkdirr   r   r=   r  r?   r@   r   formatr   r   r   replace_schema_metadatar   r   r	   appendr   write_metadataParquetDatasetrX   rY   concatr   rZ   r[   )r$   r8   r  nfilesr   dirpath	test_dataframespathsir%   r   r   table_for_metadatadatasetr   r   rV   s                    @r)   (test_dataset_read_pandas_common_metadatar5  9  s    FDGMMOOOIFE6]]  T***8Ia$hQ$g>>>W
 
 
 ,,Q///$$R$GG --d33|$,,,UD!!!bT --
> .   (/>1IJJJ((G	"G   11;;==Fy5555f55566H'u44$ N&(+++++r+   c                 ,   t          j        dg di          }| dz  }t          ||           t          j        dt          t          |           t                                          }|                    t          j
        |                    sJ d S )Nr   r   r   )
filesystem)r=   r>   r	   r   rX   r   r   r   rB   r   r   )r$   r%   r&   r   s       r)   %test_read_pandas_passthrough_keywordsr8  k  s     
sIII&	'	'B'HX^$S\\?3D3DEE  F =="&&&&&&&r+   c                 t   t          j        t          j        ddgddgg          t          j        ddg          d          }| dz  }t          j        t          j                    t          j                              }t          j        t          j        d	|          t          j        d
t          j                              g          }t          j        	                    ||          }t          ||           t          j        |                                          }t          j        ||           d S )N)id	something)value2else)r:  
something2)valueelse2r   r   )col1col2r   rA  rB  )r=   r>   Seriesr   map_r/   r   r:   r   r   r	   r   rX   rY   rZ   r[   )r$   r%   r&   udtr   r'   r   s          r)   test_read_pandas_map_fieldsrF  {  s    
	 "45!#56
   	5%.))  
 
B 'H
'")++ry{{
+
+CY--rx	/L/LMNNF(&&r622Kh'''^H%%//11F&"%%%%%r+   );r   r!   numpyr?   ImportErrorr   pyarrowr   
pyarrow.fsr   r   pyarrow.utilr   pyarrow.vendored.versionr   pyarrow.parquetparquetr   pyarrow.tests.parquet.commonr   r   r	   pandasr=   pandas.testingtestingrZ   r
   r   mark
pytestmarkr*   rJ   r^   ra   rn   rr   rt   r}   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  parametrizer5  r8  rF  r#  r+   r)   <module>rV     sm  $ 
			    	BBB      9 9 9 9 9 9 9 9       , , , , , ,      < < < < < < < < < < <   	BBB? ? ? ? ? ? ? ? ?   NB [ 
 0 0 0&   8 ' ' '$ ' ' '0 ' ' ' 
= 
= 
= ' ' ' $ $ $  ' ' '. )+ )+ )+X & & & * * *: ) ) )* , , ,( , , ,0 , , ,2 J J J0 ) ) ) & & & & & &" & & &$ ' ' '. 8 8 8( & & &0 )+>+>+>??)K9K+LMM,, ,, NM @? ,,^ ' ' ' & & & & &s/    A
 
AAA+ +	A76A7