
    z-Ph+                        d dl Z d dlZd dlZd dlZd dlZ	 d dlmZ d dl	m
Z
 n# e$ r dZY nw xY w	 d dlZd dlmZ d dl	mZ n# e$ r dxZZY nw xY wej        j        Zej        j        d             Zej        j        d             Zej        j        d             Zej        j        d             Zej        j        d             Zej        j        d	             Zd
 Zd Zej        j        ej                            dg d          d                         Zej        j        ej                            ddg          d                         Zej        j        ej                            dddg          d                         Zd Z ej        j!        ej                            dd          d                         Z"d Z#d Z$dS )    N)_write_table)alltypes_samplec                     t          d          } t          j                            |           }t	          j                    }t          ||dd           |                    d           t          j	        |          }|                    d           t          j
        ||          }t          j        | |                                                                           d S )N'  sizesnappy2.6)compressionversionr   )metadata)r   paTablefrom_pandasioBytesIOr   seekpqread_metadataParquetFiletmassert_frame_equalread	to_pandas)dfa_tablebufr   filehs        g/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/pyarrow/tests/parquet/test_parquet_file.pytest_pass_separate_metadatar    .   s     
e	$	$	$Bh""2&&G
*,,C#8UCCCCHHQKKK$$HHHQKKKN3222E"ejjll446677777    c                     d\  } }t          |           }t          j                            |          }t	          j                    }t          ||| |z  dd           |                    d           t          j	        |          j
        |k    sJ fdt          |          D             }t          j        |          }t          j        ||                                           d S )Nr      r   r	   r
   row_group_sizer   r   r   c                 :    g | ]}                     |          S  read_row_group).0ipfs     r   
<listcomp>z.test_read_single_row_group.<locals>.<listcomp>T   s'    9991"##A&&999r!   )r   r   r   r   r   r   r   r   r   r   num_row_groupsrangeconcat_tablesr   r   r   )NKr   r   r   
row_groupsresultr-   s          @r   test_read_single_row_groupr6   B   s     DAq	a	 	 	 Bh""2&&G
*,,C#a!e%u6 6 6 6 HHQKKK			B!!!!9999a999Jj))F"f..0011111r!   c                     d\  } }t          |           }t          j                            |          }t	          j                    }t          ||| |z  dd           |                    d           t          j	        |          t          |j        d d                   fdt          |          D             }t          j        |          }t          j        |         |                                           fd	t          |          D             }t          j        |          }t          j        |         |                                           d S )
Nr#   r   r	   r
   r%   r      c                 >    g | ]}                     |           S columnsr)   r+   r,   colsr-   s     r   r.   zAtest_read_single_row_group_with_column_subset.<locals>.<listcomp>g   s,    GGG"##At#44GGGr!   c                 D    g | ]}                     |z              S r:   r)   r=   s     r   r.   zAtest_read_single_row_group_with_column_subset.<locals>.<listcomp>m   s0    NNN"##Atd{#;;NNNr!   )r   r   r   r   r   r   r   r   r   r   listr<   r0   r1   r   r   r   )	r2   r3   r   r   r   r4   r5   r>   r-   s	          @@r   -test_read_single_row_group_with_column_subsetrA   Y   sM   DAq	a	 	 	 Bh""2&&G
*,,C#a!e%u6 6 6 6 HHQKKK			B
2A2DGGGGGeAhhGGGJj))F"T(F$4$4$6$6777 ONNNNU1XXNNNJj))F"T(F$4$4$6$677777r!   c                     d\  } }t          |           }t          j                            |          }t	          j                    }t          ||| |z  dd           |                    d           t          j	        |          }|j
        |k    sJ |                    t          |                    }t          j        ||                                           d S )Nr#   r   r	   r
   r%   r   )r   r   r   r   r   r   r   r   r   r   r/   read_row_groupsr0   r   r   r   )r2   r3   r   r   r   r-   r5   s          r   test_read_multiple_row_groupsrD   r   s    DAq	a	 	 	 Bh""2&&G
*,,C#a!e%u6 6 6 6 HHQKKK			B!!!!a))F"f..0011111r!   c                     d\  } }t          |           }t          j                            |          }t	          j                    }t          ||| |z  dd           |                    d           t          j	        |          }t          |j        d d                   }|                    t          |          |          }t          j        ||         |                                           |                    t          |          ||z             }t          j        ||         |                                           d S )	Nr#   r   r	   r
   r%   r   r8   r;   )r   r   r   r   r   r   r   r   r   r   r@   r<   rC   r0   r   r   r   )r2   r3   r   r   r   r-   r>   r5   s           r   0test_read_multiple_row_groups_with_column_subsetrF      s&   DAq	a	 	 	 Bh""2&&G
*,,C#a!e%u6 6 6 6 HHQKKK			B
2A2Da$77F"T(F$4$4$6$6777 a$+>>F"T(F$4$4$6$677777r!   c                     d\  } }t          |           }t          j                            |          }t	          j                    }t          ||| |z  dd           |                    d           t          j	        |          }|
                                dk    sJ |
                    |j        d d                   dk    sJ d S )	Nr#   r   r	   r
   r%   r   r   r$   )r   r   r   r   r   r   r   r   r   r   scan_contentsr<   )r2   r3   r   r   r   r-   s         r   test_scan_contentsrI      s    DAq	a	 	 	 Bh""2&&G
*,,C#a!e%u6 6 6 6 HHQKKK			B&&&&BJrrN++u444444r!   c                    | dz  }t          j        t          |                     dt          |           d}t          j        t
                    5 }t          j        |           d d d            n# 1 swxY w Y   |                    t                    rt          j        dk    rd S |                    |           d S )N	directoryzCannot open for reading: path 'z' is a directorywin32)osmkdirstrpytestraisesIOErrorr   r   errisinstancePermissionErrorsysplatformmatch)tempdirpathmsgexcs       r   0test_parquet_file_pass_directory_instead_of_filer\      s    [ DHSYY
GCII
G
G
GC	w		 3
t              
)) clg.E.EIIcNNNNNs   A44A8;A8c                     t          j        t          j        ddg          t          j        ddg          gddg          } t          j                    }t	          j        | |           t	          j        |                                          }|j        	                    d          
                                ddgk    sJ |j        	                    d	          
                                ddgk    sJ d
D ]T}t          j        t          t          f          5  |j        	                    |           d d d            n# 1 swxY w Y   Ud S )Nr$      foobarintsstrs)namesr      )r8   )r   tablearrayBufferOutputStreamr   write_tabler   getvaluereaderread_column	to_pylistrP   rQ   
ValueError
IndexError)rf   biofindexs       r   test_read_column_invalid_indexrs      sv   Hbh1v&&%(@(@A"F+- - -E


!
!CN5#
s||~~&&A8"",,..1a&88888"",,..5%.@@@@ ( (]J
344 	( 	(H  '''	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	(( (s   EE	E	
batch_size)i,    i  c           	      d   d}d}t          |          }| dz  }t          j                            |          }t	          ||d|           t          j        |          }|j        d d         |j        dd          fD ]}|                    ||          }	t          d	||z   |          }
t          |	|
          D ]s\  }}t          |||z             }t          j        |                                |j        ||d d f         j        d d |f                             d
                     td S )Ni  ru   r   pandas_roundtrip.parquetr
   r   
chunk_size
   )rt   r<   r   Tdrop)r   r   r   r   r   r   r   r<   iter_batchesr0   zipminr   r   r   iloclocreset_index)rX   rt   
total_sizery   r   filenamearrow_tablefile_r<   batchesbatch_startsbatchstartends                 r    test_iter_batches_columns_readerr      sc    JJ	j	)	)	)B33H(&&r**Kh&( ( ( ( N8$$EJssORZ_5  $$
G$LLQ
: 5zBB66 	 	LE5j%*"455C!!!c	111%)!!!W*5AAtALL   	 r!   ry   ru   c                 2   t          dd          }| dz  }t          j                            |          }|j        j        J t          ||d|           t          j        |          }d }t           ||                    }d}t          |j                  D ]}	t          j        ||                                         |                    |	g                                                              d	                     |d
z  }t          j        ||                                                             d          |                    |	g                                          j        d	d                              d                     |d
z  }d S )Nr   T)r   categoricalrw   r
   rx   c              3   z   K   t          | j                  D ]#}|                     d|g          }|D ]}|V  $d S )N  )rt   r4   )r0   r/   r}   )rq   	row_groupr   r   s       r   get_all_batchesz1test_iter_batches_reader.<locals>.get_all_batches   sk      q/00 	 	Inn%; %  G
 !  	 	r!   r   r   rd   r{   )r   r   r   r   schemapandas_metadatar   r   r   r@   r0   r/   r   r   r   rC   headr   r   )
rX   ry   r   r   r   r   r   r   batch_nor,   s
             r   test_iter_batches_readerr      s    
e	6	6	6B33H(&&r**K-999h&( ( ( ( N8$$E   ??5))**GH5'((  
H''))!!1#&&002277<<	
 	
 	

 	A
H''))5545@@!!1#&&00227=II J  	
 	
 	
 	A r!   
pre_bufferFTc                 V   d\  }}t          |          }t          j                            |          }t	          j                    }t          ||||z  dd           |                    d           t          j	        ||           }|
                                j        |k    sJ d S )Nr#   r   r	   r
   r%   r   )r   )r   r   r   r   r   r   r   r   r   r   r   num_rows)r   r2   r3   r   r   r   r-   s          r   test_pre_bufferr     s     DAq	a	 	 	 Bh""2&&G
*,,C#a!e%u6 6 6 6 HHQKKK	
	3	3	3B7799""""""r!   c                 v   |                      d          }t          j        ddgddgd          }t          j        ||           t          |d          5 }t          j        |          5 }|                                 |j        rJ |j        rJ 	 ddd           n# 1 swxY w Y   |j        rJ |j        rJ 	 ddd           n# 1 swxY w Y   |j        sJ |j        sJ t          j        |          5 }|                                 |j        rJ 	 ddd           n# 1 swxY w Y   |j        sJ dS )z
    Unopened files should be closed explicitly after use,
    and previously opened files should be left open.
    Applies to read_table, ParquetDataset, and ParquetFile
    zfile.parquetr   rd   )col1col2rbN)	joinpathr   rf   r   ri   openr   r   closed)rX   fnrf   rq   ps        r   #test_parquet_file_explicitly_closedr     s    
		.	)	)BHq!fq!f5566EN5" 
b$ 1^A 	 !FFHHHxx	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  88               8OOO8OOO 
		 q	8               8OOOOOsH   C	+'BC	B#	#C	&B#	'C		CC:D%%D),D)use_uri)TFc                 
   | \  }}}|r|n|f}|ri nt          |          }t          j        dt          d          i          }t	          j        |||           t	          j        |i |}|                                |k    sJ |j        rJ |	                                 |j        sJ t	          j        |i |5 }	|	                                |k    sJ |	j        rJ 	 d d d            n# 1 swxY w Y   |	j        sJ d S )N)
filesystemarz   )
dictr   rf   r0   r   ri   r   r   r   close)
s3_example_fsr   s3_fss3_uris3_pathargskwargsrf   parquet_filerq   s
             r   !test_parquet_file_with_filesystemr   <  sn    +E67*FF7,D6RR 6 6 6FHc599%&&EN5'e4444>42622L%''''""""		(	(	( Avvxx5    8               8OOOOOs   >$C//C36C3c                     t          j        dt          j        g d          i          } t          j                    }t          | |           |                    d           t          j        |          	                                j
        d         j        d         j        }|j        dk    sJ |j        J |j        dk    sJ |j        sJ |j        dk    sJ |j        sJ t'          |          dk    sJ d S )Nvalue)re   N   r   rd   re   r   zmarrow.ArrayStatistics<null_count=1, distinct_count=None, min=-1, is_min_exact=True, max=3, is_max_exact=True>)r   rf   rg   r   r   r   r   r   r   r   r<   chunks
statistics
null_countdistinct_countr   is_min_exactmaxis_max_exactrepr)rf   r   r   s      r   test_read_statisticsr   S  s   Hgrx66788E
*,,CHHQKKK$$))++3A6=a@KJ A%%%%$,,,>R"""">Q""""
 !< = = = = = =r!   c                     |  d}t          j        |                                          }|j        ddgk    sJ |d                                         g dk    sJ d S )Nz/unknown-logical-type.parquetzcolumn with known typezcolumn with unknown type)s   unknown string 1s   unknown string 2s   unknown string 3)r   r   r   column_namesrm   )parquet_test_datadir	test_filerf   s      r    test_read_undefined_logical_typer   f  s    'FFFIN9%%**,,E":<V!WWWWW+,6688 = = =      r!   )%r   rM   rU   rP   pyarrowr   pyarrow.parquetparquetr   pyarrow.tests.parquet.commonr   ImportErrorpandaspdpandas.testingtestingr   r   mark
pytestmarkr    r6   rA   rD   rF   rI   r\   rs   parametrizer   r   r   r   s3r   r   r   r(   r!   r   <module>r      s  $ 
			 				 



           9999999   	BBB<<<<<<<   NB [ 
 8 8 8& 2 2 2, 8 8 80 2 2 2( 8 8 8, 5 5 5 
 
 

( 
( 
( '8'8'899  :9 . v..( ( /. (V t}55# # 65 #  : M22  32 *= = =&	 	 	 	 	s   # --A 	AA