
    z-Ph                     V   d dl Z d dlZd dlZd dlZd dlZ	 d dlZn# e$ r dZY nw xY wd dlZd dl	m
Z
 d dlZd dlmZ d dlmZmZmZmZmZmZ d dlmZ d dlmZ 	 d dlmZ d dlmZm Z m!Z! n# e$ r dZY nw xY w	 d dl"Z#d dl$m%Z& n# e$ r dxZ#Z&Y nw xY wej'        j        ej'        j(        gZ)d Z*ej'        j"        d             Z+ej'        j"        d             Z,ej'        j"        d	             Z-ej'        j"        d
             Z.ej'        /                    e0e1fd          ej'        j"        d                         Z2ej'        j"        d             Z3ej'        j"        d             Z4ej'        j"        d             Z5ej'        j"        d             Z6ej'        j"        d             Z7ej'        j"        ej'        8                    ddgdgg ej9        d          dk      ej9        dd          dk      ej9        dd          :                     ej;                              dk     f          ej'        8                    dd          d                                     Z<ej'        j"        d             Z=ej'        j>        d             Z?ej'        j>        d             Z@ej'        j"        d              ZAej'        j"        ej'        j>        d!                         ZBd" ZCd# ZDd$ ZEej'        j"        d%             ZFej'        j"        d&             ZGej'        j"        d'             ZHej'        j"        d(             ZIej'        j"        d)             ZJej'        j"        d*             ZKdUd-ZLd. ZMej'        j"        ej'        8                    d/d0d1g          d2                         ZNej'        j"        d3             ZOej'        j"        d4             ZPej'        j"        ej'        8                    d/d0d1g          d5                         ZQd6 ZRd7 ZS	 	 	 dVd8ZT	 dWd9ZUej'        j"        d:             ZVej'        j"        d;             ZWej'        j"        d<             ZXej'        j"        d=             ZYej'        j"        d>             ZZej'        j"        ej'        j>        d?                         Z[ej'        j"        ej'        j>        ej'        \                    ej]        d@k    dAB          dC                                     Z^ej'        j"        ej'        j>        dD                         Z_ej'        j"        dE             Z`dXdGZaej'        j"        dH             Zbej'        j"        dI             ZcdJ ZddK ZedL ZfdM ZgdN ZhdO ZidP Zjej'        8                    dQdR          dS             Zkej'        j"        dT             ZldS )Y    N)FileSelector
FileSystemLocalFileSystemPyFileSystemSubTreeFileSystemFSSpecHandler)util)guid)_read_table_test_dataframe_write_tablec                    t          j        dg di          }| dz  }|                                 |dz  }t          j        |t          |                     t          j        |t                                }|                    |          sJ t          j        dt          j
        |                     }|                    |          sJ d S )Na         data_dirdata.parquet
filesystemzdata_dir/data.parquet)patablemkdirpqwrite_tablestr
read_tabler   equalsr	   _filesystem_uri)tempdirr   	directorypathresults        b/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/pyarrow/tests/parquet/test_dataset.pytest_filesystem_urir&   ;   s    Hc999%&&E*$IOO~%DN5#d))$$$ ]**, , ,F== ]D,@,I,IK K KF==    c                 B    t                      }t          ||            d S N)r   _partition_test_for_filesystem)r!   locals     r%   test_read_partitioned_directoryr,   N   s#    E"5'22222r'   c                     t                      }| }t          ||           t          j        |          }|                    dg          }|j        dgk    sJ d S )Nvaluescolumns)r   r*   r   ParquetDatasetreadcolumn_names)r!   r+   	base_pathdatasetr$   s        r%   'test_read_partitioned_columns_selectionr6   T   se     EI"5)444	**G\\8*\--F8*,,,,,,r'   c                    t                      }| }ddg}g d}ddg}d|gd|gd|gg}t          j        t          j        |d	
                              d          t          j        t          j        t          j        |t          
          d          d          t          j        t          j        t          j        |d
          d          d          t          j        d          d          }t          ||||           t          j        ||g d          }|                                }	|	                                                    d          }
d|
d         j        vsJ d|
d         j        vsJ d|
d         j        vsJ g dddgg}t          j        |||          }|                                }	|	                                                    d          }
|
d         dk    |
d         dk    z  |
d         dk    z  }t          j        |
d                   dk    |
d         dk    z  }|                                dk    sJ |                                dk    sJ |
j        d         |                                |                                z   k    sJ dggdggfD ]8}t          j        |||          }|                                j        dk    sJ 9d S )Nr   r   r   bcTFintegerstringbooleani4dtype      r   boolr      r;   r<   r=   r.   ))r;   =r   )r<   !=r9   )r=   ==Truer   filtersdropr9   )r;   rF   r   )r=   rH   FalserI   rN   )r<   rH   s   1 a)r<   rH   z1 a)r   pd	DataFramenparrayrepeattileobjectarange_generate_partition_directoriesr   r1   r2   	to_pandasreset_indexr.   sumshapenum_rows)r!   r+   r4   integer_keysstring_keysboolean_keyspartition_specdfr5   r   	result_dfrK   df_filter_1df_filter_2s                 r%   test_filters_equivalencyre   a   sF   EIq6L!//K%=L	L!	;	L!N 
8L555<<R@@'"'"(;f"E"E"EqII1MM72728L#G#G#GKKQOO)B--	  
 
B $E9nbIII e, , ,  G
 LLNNE""..D.99IIi(/////i)00000	),33333	
 	
 	

 
89G eW6 6 6GLLNNE!!--4-88I Y'1,X#%'Y6)+K 8Ii011Q6Y7*,K??q    ??q    ?1+//"3"3koo6G6G"GHHHH/01./02 , ,#%: : :||~~&!+++++	, ,r'   c                    t                      }| }g d}d|gg}d}t          j        t          j        |          t          j        |d          dddg          }t          ||||           t          j        ||d	d
g          }|	                                }|
                                                    d                              d          }	d t          t          |	d         j                  D             }
|
ddgk    sJ d S )Nr   r   r   r      integersrB   r>   r?   indexri   rk   r/   )ri   <rh   )ri   >r   rJ   byTrL   c                     g | ]}|S  rq   .0xs     r%   
<listcomp>z9test_filters_cutoff_exclusive_integer.<locals>.<listcomp>   s    EEE1EEEr'   r   r   r   rO   rP   rQ   rV   rR   rW   r   r1   r2   rX   sort_valuesrY   mapintr.   r!   r+   r4   r]   r`   Nra   r5   r   rb   result_lists              r%   %test_filters_cutoff_exclusive_integerr}      s7   EI"??L	\"N 	
A	1H\666  $
& 
& 
&B
 $E9nbIIIe  
  G LLNNE"""{g{.."{{--  FEc#y'<'CDDEEEK1a&      r'   z5Loss of type information in creation of categoricals.)raisesreasonc           	      N   t                      }| }t          j        ddd          t          j        ddd          t          j        ddd          t          j        ddd          t          j        ddd          g}d|gg}d	}t          j        t          j        |          t          j        |d
          dddg          }t          ||||           t          j
        ||ddg          }|                                }|                                                    d                              d          }	t          j        t          j        t          j        ddd          gd
          t          j        |d
                    }
|	d         j        |
k    sJ d S )Ni  rh   	   
            datesrB   
datetime64r?   )rk   r   rk   r/   )r   rl   z
2018-04-12)r   rm   z
2018-04-10rJ   rn   TrL   
categories)r   datetimedaterO   rP   rQ   rV   rR   rW   r   r1   r2   rX   rw   rY   Categoricalr.   )r!   r+   r4   	date_keysr`   r{   ra   r5   r   rb   expecteds              r%   &test_filters_cutoff_exclusive_datetimer      s    EI 	dAq!!dAr""dAr""dAr""dAr""I 
)N 	
A	1)<888  !
# 
# 
#B
 $E9nbIIIe((
  G LLNNE"""{g{.."{{--  ~
(-a,,-\BBB8I\:::< < <H W$000000r'   c           
      f   | dz  }t          j        t          j        ddd          t          d          d                              |d           t          j        |d	d
t          j        ddd          fg          }|                    d          	                                g dk    sJ d S )Nztimestamps.parquetz
2020-01-01r   D)periodsfreq)r   idT)use_deprecated_int96_timestampsr   <=i  r   rB   rK   r   rg   )
rO   rP   
date_rangerange
to_parquetr   r   r   column	to_pylist)r!   r#   r   s      r%   test_filters_inclusive_datetimer      s     ))DL|RcBBBBii    z$z===M$	$)$1556)   E <<''))___<<<<<<r'   c                    t                      }| }g d}d|gg}d}t          j        t          j        |          t          j        |d          dddg          }t          ||||           t          j        ||d	d
g          }|	                                }|
                                                    d                              d          }	d t          t          |	d         j                  D             }
|
ddgk    sJ d S )Nrg   ri   rB   r>   r?   rj   rk   r/   )ri   r   r   )ri   z>=r   rJ   rn   TrL   c                 ,    g | ]}t          |          S rq   )ry   rr   s     r%   ru   z2test_filters_inclusive_integer.<locals>.<listcomp>*  s    JJJa3q66JJJr'   r   r   rv   rz   s              r%   test_filters_inclusive_integerr     s7   EI"??L	\"N 	
A	1H\666  $
& 
& 
&B
 $E9nbIIIe!!
  G LLNNE""++))+4+((  KJ3sIj,A,H#I#IJJJK1a&      r'   c                 >   t                      }| }ddg}g d}ddg}d|gd|gd|gg}t          j        t          j        |d	
                              d          t          j        t          j        t          j        |t          
          d          d          t          j        t          j        t          j        |d
          d          d          t          j        d          d          }t          ||||           t          j        ||dg          }|                                }	|	                                                    d          }
d|
d         j        v sJ d|
d         j        v sJ d|
d         j        vsJ t          j        ||dddgfddddhfg          }|                                }	|	                                                    d          }
d|
d         j        vsJ d|
d         j        vsJ d|
d         j        vsJ d S )Nr   r   r8   TFr;   r<   r=   r>   r?   rA   rB   r   rC   r   rD   rE   )r<   inabrJ   rL   r   r9   r:   r   )r<   r   r   r9   znot inrN   )r   rO   rP   rQ   rR   rS   rT   rU   rV   rW   r   r1   r2   rX   rY   r.   )r!   r+   r4   r]   r^   r_   r`   ra   r5   r   rb   s              r%   test_filters_inclusive_setr   .  sV   EIq6L!//K%=L	L!	;	L!N 
8L555<<R@@'"'"(;f"E"E"EqII1MM72728L#G#G#GKKQOO)B--	  
 
B $E9nbIIIe'(  G LLNNE""..D.99I)H%,,,,,)H%,,,,,i)00000eTA3')EXy13  G
 LLNNE""..D.99IIi(/////i)00000	),3333333r'   c                 n   t                      }| }g d}d|gg}d}t          j        t          j        |          t          j        |d          dddg          }t          ||||           t          j        t                    5  t          j        ||d	g
           d d d            n# 1 swxY w Y   t          j        t                    5  t          j        ||dg
           d d d            n# 1 swxY w Y   t          j        ||ddt                      fg
          }|                                j        dk    sJ t          j        ||dddhfg
          }t          j        t                     5  |                                j        dk    sJ 	 d d d            d S # 1 swxY w Y   d S )Nrg   ri   rB   r>   r?   rj   rk   r/   )ri   r   r   rJ   )ri   z=<r   r   r   rG   r   )r   rO   rP   rQ   rV   rR   rW   pytestr~   	TypeErrorr   r1   
ValueErrorsetr2   r\   NotImplementedError)r!   r+   r4   r]   r`   r{   ra   r5   s           r%   test_filters_invalid_pred_opr   ]  s   EI"??L	\"N 	
A	1H\666  $
& 
& 
&B
 $E9nbIII	y	!	! = =
)%*#8";	= 	= 	= 	== = = = = = = = = = = = = = =
 
z	"	" = =
)%*#8";	= 	= 	= 	== = = = = = = = = = = = = = = 	+0*4dCEE)B(EG G GG <<>>"a''''	+0*4dQC)@(AC C CG 
*	+	+ , ,||~~&!+++++, , , , , , , , , , , , , , , , , ,s6   B--B14B1C66C:=C:< F**F.1F.c                    t                      }| }g d}d|gg}d}t          j        t          j        |          t          j        |d          dddg          }t          ||||           d	}t          j        t          |
          5  t          j        ||dg                                           d d d            d S # 1 swxY w Y   d S )Nrg   ri   rB   r>   r?   rj   rk   r/   z1No match for FieldRef.Name\(non_existent_column\)match)non_existent_columnrl   r   rJ   )r   rO   rP   rQ   rV   rR   rW   r   r~   r   r   r1   r2   )r!   r+   r4   r]   r`   r{   ra   msgs           r%   test_filters_invalid_columnr     sK    EI"??L!<01N	A	1H\666  $
& 
& 
&B
 $E9nbIII
>C	z	-	-	- N N
)#B"E	G 	G 	GGKtvvvN N N N N N N N N N N N N N N N N Ns   +CCCrK   )ri   rl   r   ri   r   nestedr   r9   read_method)r   read_pandasc           
         t          t          |          }t                      }| }g d}d|gg}t          |          }t	          j        t          j        |          t          j        |d          t          j        d t          |          D                       d          }	t          ||||	           t          ||          }
 ||fi |
}|j        dk    sJ d S )	Nrg   ri   r>   r?   c                 2    g | ]}|t          |          d S )r   )r   rs   is     r%   ru   z+test_filters_read_table.<locals>.<listcomp>  s&    DDDa!#a&&11DDDr'   )rk   ri   r   rJ   r   )getattrr   r   lenrO   rP   rQ   rV   rR   r   rW   dictr\   )r!   rK   r   r2   r+   r4   r]   r`   r{   ra   kwargsr   s               r%   test_filters_read_tabler     s     2{##DEI"??L	\"N 	LA	1H\666(DD588DDDEE  
 
B $E9nbIIIUG444FD%%f%%E>Qr'   c                    t                      }| }ddg}d|gg}d}t          j        t          j        |          t          j        |d          dddg	          }t          ||||           t          j        |          }|	                                }|
                    d                                          |k    sJ d S )
N2019_22019_3	year_weekr   rU   r?   )rk   r   rk   r/   )r   rO   rP   rQ   rV   rR   rW   r   r1   r2   r   r   )	r!   r+   r4   r^   r`   r{   ra   r5   r$   s	            r%   $test_partition_keys_with_underscoresr     s     EIX&K	k"N 	
A	1Xk:::  %
' 
' 
'B
 $E9nbIII	**G\\^^F==%%//11[@@@@@@r'   c                     | \  }}|dz   }t          j        dg di          }t          |||           t          ||          }|                    |          sJ d S Nz/test.parquetr   r   r   r   r   r   r   r   )s3_example_s3fsfsr#   r   r$   s        r%   test_read_s3fsr     sr    HB/!DHc999%&&E,,,,"---F==r'   c                     | \  }}|dz   }t          j        dg di          }t          |||           t          ||          }|                    |          sJ d S r   r   )r   r   r"   r#   r   r$   s         r%   test_read_directory_s3fsr     sr    #MB	&DHc999%&&E,,,,r222F==r'   c                     t          | dz            }t          j        dg di          }t          ||           t	          j        |g                                          }|                    |          sJ d S )Nr   r   r   )r   r   r   r   r   r1   r2   r   )r!   	data_pathr   r$   s       r%   test_read_single_file_listr     sx    Gn,--IHc999%&&E	"""	{++0022F==r'   c                 0    | \  }}t          ||           d S r)   )r*   r   r   r#   s      r%   $test_read_partitioned_directory_s3fsr     s#     HB"2t,,,,,r'   c                    ddg}g d}d|gd|gg}d}t          j        t          j        |          t          j        |d                              d	          t          j        t          j        t          j        |t                    d
          d          t          j        	                    |          dg d          }t          | |||           t          j        ||           }|                                }|                                                    d                              d          }	|                    d                              d                              |	j                  }
|
d                             d          |
d<   |
d                             d          |
d<   |	j        g dk                                    sJ t+          j        |	|
           d S )Nr   r   r8   foobarrD   r>   r?   rA   rB   r   )rk   r   r   r.   r/   r   rk   rn   TrL   category)rk   r.   r   r   )rO   rP   rQ   rV   rR   rS   rT   rU   randomrandnrW   r   r1   r2   rX   rw   rY   reindexr0   astypealltmassert_frame_equal)r   r4   foo_keysbar_keysr`   r{   ra   r5   r   rb   expected_dfs              r%   r*   r*      s   1vHH		N 	A	1x---44R88wrwrx???CCQGG)//!$$	 
 100
2 
2 
2B $B	>2FFF	b999GLLNNE""++))+4+((  >>W>--KTK**GI$5G66  %U+22:>>K$U+22:>>K!B!B!BBGGIIIII)[11111r'   c           	           t           t                    st          t                                t	                    t           dt           dd                     fd |dg            d S )Npathsepsep/c                    |         \  }}|D ]}|||fgz   }                     t          |           d                    ||          g          }                    |           |dz
  k    r&ddlm}                      |t                      g          }	t          |          }
t          j	        
                    |
          }                    |	          5 }t          ||           d d d            n# 1 swxY w Y                       |	          j        |j        k    sJ                     |	          j        |j        k    sJ                      |dg          }                    |          5 }	 d d d            n# 1 swxY w Y    ||dz   |                                |dg          }                    |          5 }	 d d d            n# 1 swxY w Y   d S )Nz{}={}r   r   )FileType_SUCCESS)joinr   format
create_dir
pyarrow.fsr   r
   _filter_partitionr   Tablefrom_pandasopen_output_streamr   get_file_infotypeNotFoundFile)base_dirlevel	part_keysnamer.   valuethis_part_keys	level_dirr   	file_pathfiltered_df
part_tableffile_successDEPTH_visit_levelra   r   r`   r   s                 r%   r   z5_generate_partition_directories.<locals>._visit_level2  s   %e,f 	 	E&4-8NHtU++&  I MM)$$$	!!//////#LL)TVV)<==	/NCCX11+>>
**955 0 Q///0 0 0 0 0 0 0 0 0 0 0 0 0 0 0''	2278;LLLLL''	2278=HHHH&||Y
,CDD**<88 A               Y	>BBB&||Y
,CDD**<88 A              7	 	s6   C;;C?	C?	<F

F	F	G!!G%	(G%	r   )
isinstancer   r   r   r   r   )r   r   r`   ra   r   r   r   s   ` ``@@@r%   rW   rW   '  s     b*%% --++,,Eb)WR%<%<==G         @ L1b!!!!!r'   c                 \   t          j        t          |           t                    }g }|D ]c\  }}|                    |           t          |t          j        t          j        f          rt          j	        |          }|| |         |k    z  }d| |         
                    |d          S )Nr?   r   )axis)rQ   onesr   rC   appendr  r   r   rO   	TimestamprM   )ra   r   	predicateto_dropr   r   s         r%   r   r   U  s    Bt,,,IG  ' 'et ehmX->?@@ 	(L''ERX&&		i=gA...r'   c                 L   | dz  }|                                  t          j                            t	          j        dg di                    }t          j        ||dz             | dz  }|                                  t          j                            t	          j        dg di                    }t          j        ||dz             t          j        | dgg          }|	                    d          
                    t          j        g dg                    sJ d S )	NzA=0Br   r   zA=1r8   )ArH   r   r   )r   r   r   r   rO   rP   r   r   r   r   r   chunked_array)r!   dir1table1dir2table2r   s         r%   "test_filter_before_validate_schemar  e  s    U?DJJLLLX!!",YYY/?"@"@AAFN64.0111U?DJJLLLX!!",___/E"F"FGGFN64.0111 M'^,<+=>>>E<<##B$4iii[$A$ABBBBBBBr'   c                    d}d}| t                      z  }|                                 g }g }t          |          D ]}t          ||          }|d                             t
          j                  |d<   |d                    |          z  }t          j	        
                    |          }	t          |	|           |                    |	           |                    |           |dz                                   dd}
 |
|          t          j        |          }                    |          sJ d	d
dj        dz
  g}fd|D             }t#          j        ||          }t          j	                            fd|D             |j        j                  }|                    |          sJ t#          j        |d           t          ||          j        d d d df         }| d                    t                                z  }t          j	        
                    |          }t          ||           d S )Nr   rB   seeduint32
{}.parquetz_SUCCESS.crcTc                 T    t          j        | fi |}|                    ||          S )N)r0   use_threads)r   r1   r2   )pathsr0   r  r   r5   s        r%   read_multiple_filesz5test_read_multiple_files.<locals>.read_multiple_files  s0    #E44V44||G|EEEr'   r   r      r   c                 D    g | ]}                     |          j        S rq   )fieldr   rs   r   r$   s     r%   ru   z,test_read_multiple_files.<locals>.<listcomp>  s&    777!a%777r'   r/   c                 :    g | ]}                     |          S rq   )r   r  s     r%   ru   z,test_read_multiple_files.<locals>.<listcomp>  s%    $G$G$G!V]]1%5%5$G$G$Gr'   )namesmetadata)r  rh   )NT)r
   r   r   r   r   rQ   int64r   r   r   r   r   r  touchconcat_tablesr   num_columnsr   r   from_arraysschemar!  iloc)r!   nfilessizedirpath	test_datar  r   ra   r#   r   r  r   to_read	col_namesout	bad_applebad_apple_pathtr$   s                     @r%   test_read_multiple_filesr3  {  sn   FDGMMOOOIE6]]  T*** (|**28448,,Q///$$R((UD!!!T ~$$&&&F F F F ! ''F	**H==""""" !Q*Q./G7777w777I
-
3
3
3Cx##$G$G$G$Gw$G$G$G*3-3]-C $ E EH ::h M't,,,,  1---2111bqb59I|22466:::N
Y''AN#####r'   c                    d}d}| t                      z  }|                                 g }g }g }t          |          D ]}t          ||          }t	          j        ||z  |dz   |z            |_        d|j        _        |d                    |          z  }	t          j
                            |          }
t          |
|	           |                    |
           |                    |           |                    |	           t          j        |          }ddg|                                                              }t%          j        fd	|D                       }t)          j        ||           |                    t-                                                              }|j        |j        k    sJ t)          j        |                    |j                  |           d S )
NrB   r  r   rk   r  uint8stringsr/   c                      g | ]
}|         S rq   rq   )rs   rt   r0   s     r%   ru   z,test_dataset_read_pandas.<locals>.<listcomp>  s    555!G*555r'   )r
   r   r   r   rQ   rV   rk   r   r   r   r   r   r   r  r   r1   r   rX   rO   concatr   r   r   r[   r   r0   )r!   r)  r*  r+  r,  framesr  r   ra   r#   r   r5   r$   r   r0   s                 @r%   test_dataset_read_pandasr:    s   FDGMMOOOIFE6]]  T***9QXA~66,,Q///$$R((UD!!!bT((G	"G   11;;==Fy5555f55566H&(+++   W 66@@BBF<8>))))&..1A.BBHMMMMMr'   c                    | t                      z  }|                                 t          dd          }|d                    d          z  }t          j                            |          }t          ||d           t          j	        |d          }|
                                                    |          sJ d S )	Nr   r   r  r  2.6versionT)
memory_map)r
   r   r   r   r   r   r   r   r   r1   r2   r   )r!   r+  ra   r#   r   r5   s         r%   test_dataset_memory_mapr@    s     GMMOOO	!	$	$	$B\((+++DH  $$Ee,,,,D" " "G<<>>  '''''''r'   c                    | t                      z  }|                                 t          dd          }|d                    d          z  }t          j                            |          }t          ||d           t          j	        t                    5  t          j        |d           d d d            n# 1 swxY w Y   d	D ]A}t          j        ||          }|                                                    |          sJ Bd S )
Nr   r   r  r  r<  r=  i)buffer_size)   i   )r
   r   r   r   r   r   r   r   r   r~   r   r   r1   r2   r   )r!   r+  ra   r#   r   rB  r5   s          r%   #test_dataset_enable_buffered_streamrD    sQ   GMMOOO	!	$	$	$B\((+++DH  $$Ee,,,,	z	"	" & &
	& 	& 	& 	&& & & & & & & & & & & & & & & # , ,#. . .||~~$$U++++++, ,s   B<<C C c                    | t                      z  }|                                 t          dd          }|d                    d          z  }t          j                            |          }t          ||d           dD ]n}t          j	        ||          }|
                                                    |          sJ t          j        ||          }|                    |          sJ od S )	Nr   r   r  r  r<  r=  )TF)
pre_buffer)r
   r   r   r   r   r   r   r   r   r1   r2   r   r   )r!   r+  ra   r#   r   rF  r5   actuals           r%   test_dataset_enable_pre_bufferrH    s    GMMOOO	!	$	$	$B\((+++DH  $$Ee,,,,# $ $
#
, , ,||~~$$U+++++w:>>>}}U######$ $r'   r   rB   c                     g }g }t          |          D ]c}t          ||          }| d                    |          z  }|                    t	          ||                     |                    |           d|S )Nr  r  )r   r   r   r  r   )r4   r)  
file_nrowsr,  r  r   ra   r#   s           r%   _make_example_multifile_datasetrK    s    IE6]]  Za000<..q111b$//000TLr'   c                 l    d |D             }t          |          t          | j                  k    sJ d S )Nc                 P    g | ]#}t          |                                          $S rq   )r   as_posix)rs   r#   s     r%   ru   z)_assert_dataset_paths.<locals>.<listcomp>,  s(    444dS!!444r'   )r   files)r5   r  s     r%   _assert_dataset_pathsrP  +  s<    44e444Eu::W]++++++++r'   
dir_prefix_.c                    | t                      z  }|                                 t          |dd          }|d                    |          z                                   t	          j        |          }t          ||           d S )Nr   rB   r)  rJ  z	{}staging)r
   r   rK  r   r   r1   rP  r!   rQ  r+  r  r5   s        r%   test_ignore_private_directoriesrW  0  s     GMMOOO+GB78: : :E {!!*---44666((G'5)))))r'   c                    | t                      z  }|                                 t          |dd          }|dz                      d          5 }|                    d           d d d            n# 1 swxY w Y   |dz                      d          5 }|                    d           d d d            n# 1 swxY w Y   t          j        |          }t          ||           d S )Nr   rB   rU  z	.DS_Storewbs	   gibberishz.privater
   r   rK  openwriter   r1   rP  r!   r+  r  r   r5   s        r%   test_ignore_hidden_files_dotr^  A  s^   GMMOOO+GB78: : :E K
	%	%d	+	+ q	               J
	$	$T	*	* a	               ((G'5)))))$   A22A69A6B77B;>B;c                    | t                      z  }|                                 t          |dd          }|dz                      d          5 }|                    d           d d d            n# 1 swxY w Y   |dz                      d          5 }|                    d           d d d            n# 1 swxY w Y   t          j        |          }t          ||           d S )Nr   rB   rU  _committed_123rY  s   abcd_started_321rZ  r]  s        r%   #test_ignore_hidden_files_underscorerc  T  s_   GMMOOO+GB78: : :E $
$	*	*4	0	0 A	               N
"	(	(	.	. !	               ((G'5)))))r_  c                 4   | d                     |          z  t                      z  }|                    d           t          |dd          }t	          j        |          }t          ||           t	          j        |          }t          ||           d S )Nz{0}dataTparentsr   rB   rU  )r   r
   r   rK  r   r1   rP  rV  s        r%   /test_ignore_no_private_directories_in_base_pathrg  g  s    
 	((444tvv=GMM$M+GB78: : :E &&G'5))) ((G'5)))))r'   c           	         dgdz  dgdz  z   }t          j        t          j        t          t	          |                              t          j        |                                          gddg          }t          j        |t          |           dg           | dz  }|	                                 t          j        |t          |          dg           t          j
        | d	g
          }|                    |          sJ d S )Nxxxr   yyyrk   _partr   partition_cols_private_duplicate_private)ignore_prefixes)r   r   rR   r   r   dictionary_encoder   write_to_datasetr   r   r   r   )r!   partr   private_duplicater2   s        r%   test_ignore_custom_prefixesrv  z  s   7Q;%1$DH
s4yy!!""
((** w! ! !E
 s7||WIFFFF"66s#455(/y2 2 2 2 =*/ / /D ;;ur'   c                     | dz  }|                                  t          j        |          }|                                }|j        dk    sJ |j        dk    sJ d S )Nr5   r   )r   r   r1   r2   r\   r%  )r!   	empty_dirr5   r$   s       r%   test_empty_directoryry    sf    )#IOO	**G\\^^F?a""""""r'   c                 H   dd l }dd lm} dd lm}  |j        t          d          t          d          t          t          d                    t          j	        gdz  t          j
        ddd                              d	          d
          }|j                                        }ddg}	t          j                            ||dd          }
 |j        |
| |	|           t$          j                            t+          |           d          }|E|                    |d          5 } |j        |
j        |           d d d            n# 1 swxY w Y   n |j        |
j        |            |j        | |          }t5          |j        j                  }|t5          |
j        j                  k    sJ |                                }|                                }|j                                        }|	|dt=          |	          z  d          k    sJ ||         }|	D ] }||                             d          ||<   !|rJ|                    d          j         !                                }|d                             |          |d<    |j"        ||           d S )Nr   
aaabbbbccc
eefeffgeeer   
2017-01-01
2017-01-11datetime64[D]r?   datetime64[ns])group1group2numnanr   r  r  F)r'  safepreserve_indexr   _common_metadatarY  r   r   )#pandaspandas.testingtestingpyarrow.parquetparquetrP   listr   rQ   r  rV   r   r0   tolistr   r   r   rs  osr#   r   r   r[  write_metadatar'  r1   r   r   r2   rX   r   r  r   to_pandas_dtyper   )r4   r   r'  
index_namerO   r   r   	output_dfcolspartition_byoutput_tablemetadata_pathr   r5   dataset_colsinput_tableinput_dfinput_df_colscolexpected_date_types                       r%   &_test_write_to_dataset_with_partitionsr    s           |$$|$$E"IIx"}	,OLLLSS    I ##%%Dh'L8''	&u7< ( > >LBi#-/ / / / GLLY1CDDM__]D11 	6QBl11555	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	,-}===b	+57 7 7G w~+,,L3|28999999,,..K$$&&H $++--M=c,.?.?)?)@)@AAAAA~H ; ;"3..z::	# I#\\&116FFHH%f-445GHH	&B)X.....s   *EEEc           
         dd l }dd lm}  |j        t	          d          t	          d          t	          t          d                    t          j        ddd                              d	          d
          }|j	        
                                }t          j                            |          }|t                      }n1t          |t                     st#          t%          |                    }d}t          |          D ]} |j        || |           t)          t+          |           dd          }	|                    |	          }
d |
D             }t/          |          |k    sJ  |j        | |                                          }|                                }|                                }||         }t9          j        ||           d S )Nr   r{  r|  r   r}  r~  r  r?   r  )r  r  r  r   rB   r   FT)allow_not_found	recursivec                 F    g | ]}|j                             d           |S )z.parquet)r#   endswith)rs   infos     r%   ru   z8_test_write_to_dataset_no_partitions.<locals>.<listcomp>  s-    MMMTdi.@.@.L.LMDMMMr'   )r  r  r  rP   r  r   rQ   rV   r   r0   r  r   r   r   r   r  r   r   r   rs  r   r   r   r   r1   r2   rX   drop_duplicatesr   r   )r4   r   rO   r   r  r  r  nr   selectorinfosoutput_filesr  r  s                 r%   $_test_write_to_dataset_no_partitionsr    s
          |$$|$$E"II	,OLLLSS 	   I ##%%D8''	22L$&&


J// =!-
";";<<
 	
A1XX 3 3L)'1	3 	3 	3 	3 	3 C	NNE&*, , ,H $$X..EMMUMMML|!!!! $"#j  
dff  $$&&H''))H~H)X.....r'   c                 >    t          t          |                      d S r)   r  r   r!   s    r%   %test_write_to_dataset_with_partitionsr    s    *3w<<88888r'   c                    t          j        t          j        dt          j                              t          j        dt          j                              t          j        dt          j                              t          j        dt          j                              t          j        dt          j        d                    g          }t          t          |           |	           d S )
Nr  )r   r  r  r  r   us)unitr'  )	r   r'  r  r<   r"  int32	timestampr  r   )r!   r'  s     r%   0test_write_to_dataset_with_partitions_and_schemar  
  s    Y	<<<	<<<RXZZ888RXZZ888bl.E.E.EFFF	H I IF
 +GV% % % % % %r'   c                 B    t          t          |           d           d S )Nr  )r  r  r  s    r%   4test_write_to_dataset_with_partitions_and_index_namer    s.    *G/ / / / / /r'   c                 >    t          t          |                      d S r)   )r  r   r  s    r%   #test_write_to_dataset_no_partitionsr    s    (W66666r'   c                 N    t          | dz             t          | dz             d S )Ntest1test2)r  r  r  s    r%   test_write_to_dataset_pathlibr     s.    *7W+<===(7):;;;;;r'   c                 .   |\  }}t          j        t          d          5  t          | dz  |           d d d            n# 1 swxY w Y   t          j        t          d          5  t	          | dz  |           d d d            d S # 1 swxY w Y   d S )Nz"path-like objects are only allowedr   r  r   r  )r   r~   r   r  r  )r!   r   r   rR  s       r%   &test_write_to_dataset_pathlib_nonlocalr  &  s6    EB	y(L	M	M	M . ..g"	. 	. 	. 	.. . . . . . . . . . . . . . . 
y(L	M	M	M . .,g"	. 	. 	. 	.. . . . . . . . . . . . . . . . . .s#   AA	A(B

BBwin32z,test fails because of unsupported characters)r   c                 2    | \  }}t          ||           d S Nr   )r  r   s      r%   *test_write_to_dataset_with_partitions_s3fsr  5  s3     HB*     r'   c                 2    | \  }}t          ||           d S r  )r  r   s      r%   (test_write_to_dataset_no_partitions_s3fsr  A  s3     HB(     r'   c                 .   t          j        dg di          }t          j                            |          }t          |           }t          j        ||t                                 t          j	        |          }|
                    |          sJ d S )Nr  r   r   )rO   rP   r   r   r   r   r   rs  r   r   r   )r!   ra   r   r#   r$   s        r%    test_write_to_dataset_filesystemr  J  s    	sIII&	'	'BH  $$Ew<<Dt0A0ABBBB]4  F==r'   d   c                    | dz  }t                      }t          j        t          j        |          t          j                            |          dddg          }t          j        	                    |          }d}t          j        ||j                  5 }t          |          D ]}|                    |           	 d d d            n# 1 swxY w Y   t          j        |          }	|	j        j        |k    sJ | dz  }
|                    t'          |
                    5 }t          j        |j        |           d d d            n# 1 swxY w Y   t          j        | |          }|S )	Nr   )rk   r.   rk   r.   r/   r   	_metadatar   )r   rO   rP   rQ   rV   r   r   r   r   r   r   ParquetWriterr'  r   r   ParquetFiler!  num_row_groupsr   r   r  r1   )r!   r{   r#   r+   ra   r   
num_groupswriterr   readerr  r   r5   s                r%   _make_dataset_for_picklingr  U  s   ^#DE	1)//!$$  "
$ 
$ 
$B H  $$EJ		$	-	- &z"" 	& 	&Au%%%%	&& & & & & & & & & & & & & & & ^D!!F?)Z7777k)M		!	!#m"4"4	5	5 +
%,***+ + + + + + + + + + + + + + + E# # #G Ns$   (CCC&EEEc                 J    fd}t          |           } ||          sJ d S )Nc                 \    |                                          |                     k    S r)   )loadsdumps)objpickle_modules    r%   is_pickleablez*test_pickle_dataset.<locals>.is_pickleables  s*    m))-*=*=c*B*BCCCCr'   )r  )r!   r  r  r5   s    `  r%   test_pickle_datasetr  q  sL    D D D D D )11G=!!!!!!!r'   c                 R   | dz  }t          j        g dg dg dd          }t          j                            |          }t          j        |t          |          ddg           t          j        |          	                                }t          j
        ||d	z             d S )
Nz
ARROW-3208)r  r   g      @r    r   g333333=@)r  r   r   r  r  r   r   )r   r   r   r   r   r   r   )onetwothreer  r  )	root_pathrn  zoutput.parquet)rO   rP   r   r   r   r   rs  r   r1   r2   r   )r!   r#   ra   r   s       r%   test_partitioned_datasetr  z  s     \!D	000,,,&&&  
 
B
 H  $$ET(-u~7 7 7 7d##((**EN5$!1122222r'   c                    | dz  }t          j        d t          d          D             dz  gdg          }t          j        d t          d          D             dz  gdg          }t          j        |t          |                     t          j        |t          |                     t          j        |dg	                                          }|d
                             d
          	                                |d
                             d
          	                                g}|d
         j
        dk    sJ |d
                             d
          |d
                             d          }}|                    |d
                   r|                    |d                   sJ d S |                    |d                   sJ |                    |d
                   sJ d S )NzARROW-3325-datasetc                 6    g | ]}t          j        d           S r   r	   randsr   s     r%   ru   z0test_dataset_read_dictionary.<locals>.<listcomp>       555qDJrNN555r'   rB   r   f0rl  c                 6    g | ]}t          j        d           S r  r  r   s     r%   ru   z0test_dataset_read_dictionary.<locals>.<listcomp>  r  r'   )r  )read_dictionaryr   r   r   )r   r   r   r   rs  r   r1   r2   chunkrr  
num_chunksr   )r!   r#   t1t2r$   	ex_chunksc0c1s           r%   test_dataset_read_dictionaryr    s   ))D	55E!HH555:;D6	J	J	JB	55E!HH555:;D6	J	J	JBc$ii0000c$ii0000tf& & &&*dff  AQ1133AQ11335I !91$$$$AY__Q!3!3B	yy1 'yy1&&&&&&&yy1&&&&&yy1&&&&&&&r'   c                    t          j        dt          j        g dt          j                              i          }t	          j        || dz             t	          j        || dz             t          j        dg          }t	          j        | dz  |          }t          j        dg di|          }|                    |          sJ t	          j        | |          }t          j        dg di|          }|                    |          sJ t	          j	        | |          }t          j        dg di|          }|
                                                    |          sJ d S )Nr   r   zdata1.parquetzdata2.parquet)r   r"  r  )r   r   r   r   r   r   )r   r   rR   r  r   r   r'  r   r   r1   r2   )r!   r   r'  r$   r   s        r%   test_read_table_schemar    sg   Hc28IIIrxzz::;<<EN5'O3444N5'O3444Y'((F ]7_4VDDDFxiii(888H==""""" ]76222Fx0001&AAAH=="""""wv666Fx0001&AAAH;;==)))))))r'   c                    t          j        t          j        g dt          j                              t          j        g dt          j                              d          }t          j        || dz             t          j        | dz  ddg          }t          j        ddg          }|j	        ddgk    sJ |j        |k    sJ d S )Nr   r   r   r   r/   )r   r  )
r   r   rR   r  r5  r   r   r   r'  r3   )r!   r   r$   expected_schemas       r%   *test_read_table_duplicate_column_selectionr    s    H28IIIrxzz::8IIIrxzz::< < = =EN5'N2333]7^3c3ZHHHFi @AAO3*,,,,=O++++++r'   c                    dd l m} | dz  }|dz  dz  dz                      d           t          j        dg d	i          }t          j        |t          |dz  dz  dz  d
z                       |                    g d          }t          j	        t          |          |          }|j
        g dk    sJ t          j        t          |          |                                          }|j
        g dk    sJ d S )Nr   test_partitioning20121001Tre  r   r   r   )yearmonthday)field_names)partitioning)r   r  r  r  )pyarrow.datasetr5   r   r   r   r   r   r   r  r   r3   r1   r2   )r!   dsr  r   rt  r$   s         r%   test_dataset_partitioningr    s>          --I$%,,T,:::Hc999%&&ENs9v%,t3nDEEG G G ??'?'?'??@@D]IT+ + +F"?"?"?????IT+ + ++/466 "?"?"???????r'   c                 :   t          j        dg di          }t          j        || dz             t	          t          |           t                                }t          j        d|          }|                                }|	                    |          sJ d S )Nr   r   r   rS  r   )
r   r   r   r   r   r   r   r1   r2   r   )r!   r   r   r5   r$   s        r%   #test_parquet_dataset_new_filesystemr    s    Hc999%&&EN5'N2333"3w<<1B1BCCJ
;;;G\\^^F==r'   c                 d   t          j        d          }|                    d          }t          j        dg di          }t          j        || dz             t          |                               dd          }t          j	        ||          }|d	z   }|j
        d
         j        |k    sJ d S )Nfsspecfiler   r   r   \r   r   z/data.parquetr   )r   importorskipr   r   r   r   r   r   replacer1   	fragmentsr#   )r!   r  r   r   r#   r5   r   s          r%   6test_parquet_dataset_partitions_piece_path_with_fsspecr    s      **F""6**JHc999%&&EN5'N2333 w<<c**D% % %G o%HQ$000000r'   c                    t          j        dg di          }| dz  }g fd}d}t          j        ||dg||           |dz  dz  |d	z  dz  |d
z  dz  h}t	          t          t          j                            }||k    sJ d S )Nr   r   r  c                 <                         | j                   d S r)   )r  r#   )written_filepaths_writtens    r%   file_visitorzDtest_parquet_write_to_dataset_exposed_keywords.<locals>.file_visitor  s     \./////r'   zpart-{i}.parquet)r  r  basename_template1zpart-0.parquet23)r   r   r   rs  r   rx   pathlibPath)r!   r   r#   r  r  expected_pathspaths_written_setr  s          @r%   .test_parquet_write_to_dataset_exposed_keywordsr    s    Hc999%&&E^#DM0 0 0 0 0 +t3%%1*;= = = =
 	s
%%s
%%s
%%N
 Cm<<==......r'   write_dataset_kwarg))r   T)r   Fc                    ddl m} t          j        dg di          }| dz  }t	          j        |j                  }|\  }}|t	          j        t          j                  j	        vsJ ||j	        v sJ t          j                            |dd          5 }t          j        ||fi ||i |j        d         \  }	}
}||         |k    sJ 	 ddd           dS # 1 swxY w Y   dS )	zEVerify kwargs in pq.write_to_dataset are passed onto ds.write_datasetr   Nr   r   zout.parquetwrite_datasetT)autospec)r  r5   r   r   inspect	signaturer  r   rs  
parametersmockpatchrU   
mock_calls)r!   r  r  r   r#   r"  keyargmock_write_dataset_name_argsr   s               r%   #test_write_to_dataset_kwargs_passedr,    sV    !     Hc999%&&E]"D!""233I"HC g'(;<<GGGGG)&&&&&			2		>	> "!
E466C:6661<Q?ufc{c!!!!!	" " " " " " " " " " " " " " " " " "s   5CC Cc                 B   t          j        t          j        g dg d          g dd          }t          j        |          }| dz  }t          j        || dz  dg           d	 |                                D             }t          |          d
k    sJ d|vsJ d S )N)r   r9   r   r8   r   r   )catr  r5   r.  rm  c                 D    g | ]}|                                 |j        S rq   )is_dirr   )rs   r   s     r%   ru   z;test_write_to_dataset_category_observed.<locals>.<listcomp>8  s'    <<<!<qv<<<r'   r   zcat=c)	rO   rP   r   r   r   r   rs  iterdirr   )r!   ra   r   r#   subdirss        r%   'test_write_to_dataset_category_observedr3  *  s    
 
~ooo///JJJyy  
 
B HRLLEYDw"E7    =<t||~~<<<Gw<<1'!!!!!!r'   )r   rB   )NNNr)   )r  )mr   r!  r  r  sysnumpyrQ   ImportErrorr   unittest.mockr$  pyarrowr   pyarrow.computecomputepcr   r   r   r   r   r   r   pyarrow.testsr	   pyarrow.utilr
   r  r  r   pyarrow.tests.parquet.commonr   r   r   r  rO   r  r  r   markr5   
pytestmarkr&   r,   r6   re   r}   xfailr   AssertionErrorr   r   r   r   r   r   parametrizer  castr"  r   r   s3r   r   r   r   r*   rW   r   r  r3  r:  r@  rD  rH  rK  rP  rW  r^  rc  rg  rv  ry  r  r  r  r  r  r  r  r  skipifplatformr  r  r  r  r  r  r  r  r  r  r  r  r  r,  r3  rq   r'   r%   <module>rH     s
  $   				  



   	BBB                 H H H H H H H H H H H H H H H H                  4 4 4 4 4 4 4 4 4 4 4   	BBB   NB k!6;#67
     & 3 3 3
 	- 	- 	- C, C, C,L ! ! !B  ~&B	    '1 '1  '1T = = =  ! ! !B +4 +4 +4\ %, %, %,P N N N. /0012""(:..2""(8S11A5""(8S1166xrxzzBBQF	HI I (EFF  GFI I 4 A A A.                   - -  -
$2 $2 $2N+" +" +"\/ / /  C C C* 5$ 5$ 5$N "N "N "NJ ( ( ( , , ,& $ $ $"	 	 	 	, , ,
 Sz22* * 32 * * * *$ * * *$ Sz22* * 32 *"  *# # # 7;266::/ :/ :/ :/| 59+/ +/ +/ +/\ 9 9 9 % % % / / /
 7 7 7 < < <
 
. 
.  
. CLG+I  K K K K  
             8 " " " 3 3 3 ' ' '.* * *.
, 
, 
,@ @ @0     1 1 1"/ / /0 . 1  " "	 "* " " " " "s0    %%A* *A43A48
B 	BB