
    z-PhR                        d dl Z d dlZd dlmZ d dlZd dlZd dlZd dlZd dlZ	d dlm
Z
 d dlmZ ej        j        Z	 d dlmZ d dlZn# e$ r Y nw xY w ej        d          d             Zd	 Zdd
ZddZej        j        ej                            dg d          d                         Zd Zd Zd Zd Zd Z d Z!d Z"ej        j#        d             Z$d Z%d Z&d Z'd Z(dS )    N)Path)fs)util)assert_frame_equalmodule)scopec                     | dz  S )Norc )base_datadirs    V/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/pyarrow/tests/test_orc.pydatadirr   ,   s    %    c                 @   |D ]}||         }| |         }|dk    rPd |D             rD||                                          }t          |          D ]\  }}d |D             ||<   |||<   i|d         j        }t          |t          j                  rt          j        |          }nt          |t          j                  r|j        j        }n|t          j
        u rdgt          |          z  }	t          t          ||                    D ]u\  }\  }
}t          j        |          sY|
                                j        }d| z  }t          j
        t!          ||z                                          |          |	|<   vt          j        |	          }|||<   dS )z_
    Fix type of expected values (as read from JSON) according to
    actual ORC datatype.
    mapc                 L    g | ]!}|D ]}|                                 d dhk    "S keyvalue)keys).0mds      r   
<listcomp>z&fix_example_values.<locals>.<listcomp>:   s7    KKK!KKAeW--KKKKr   c                 .    g | ]}|d          |d         fS r   r   )r   r   s     r   r   z&fix_example_values.<locals>.<listcomp>>   s%    <<<Q1U8QwZ0<<<r   r   N
   )copy	enumerate	__class__
issubclassdatetimepdto_datetimedatedtdecimalDecimallenzipisnullas_tupleexponentroundscalebSeries)actual_colsexpected_colsnameexpectedactualcolir   typconverted_decimalsr   vexpfactors                 r   fix_example_valuesr<   1   s   
  ' ' &T"EMMKKxKKK   %**,,C!(++ = =1<<!<<<A"%M$Qi!c8,-- 	5~h//HHX]++ 	5{'HHGO##"&#h--!7&s68'<'<== H H	6Aqy|| H**,,/CC4ZFa&j(9(9::AA#FF 'q)y!344H&d=' 'r   c                 n    |||||                              d          }t          | |d           d S )NT)dropF)check_dtype)reset_indexr   )orc_dfexpected_dfstartstops       r   check_example_valuesrE   W   sG    D,!%*-99t9DDv{>>>>>>r   Fc           
      H   ddl m} |                    |           }|                                }t	          |t
          j                  sJ |                                 t          j	        |
                                          }t          |j                  t          |j                  k    sJ |j                            |j                  s|                    |j                  }|rt          ||           t!          ||           d}t#          |j                  D ]p}|                    |          }	t!          t          j	        |	
                                          |||t)          |	          z              |t)          |	          z  }q||j        k    sJ dS )zC
    Check a ORC file against the expected columns dictionary.
    r   r
   columns)rC   rD   N)pyarrowr
   ORCFileread
isinstancepaTablevalidater"   	DataFrame	to_pydictsetrI   equalsreindexr<   rE   rangenstripesread_striper(   nrows)
orc_pathrB   need_fixr
   orc_filetablerA   json_posr6   batchs
             r   check_example_filer`   ]   s    {{8$$HMMOOEeRX&&&&&	NN \%//++,,F{"##s6>':'::::: >  !455 B!))&.)AA 06;///---H8$%%  $$Q''R\%//*;*;<<(#+"*SZZ"7	9 	9 	9 	9 	CJJx~%%%%%%r   filename)zTestOrcFile.test1.orcTestOrcFile.testDate1900.orczdecimal.orcc                     || z  }t          j        t          |                    d                    d          }t	          ||d           dS )z
    Check a ORC file example against the equivalent JSON file, as given
    in the Apache ORC repository (the JSON file has one JSON object per
    line, corresponding to one row in the ORC file).
    z.jsn.gzT)lines)r[   N)r"   	read_jsonstrwith_suffixr`   )ra   r   pathr]   s       r   test_example_using_jsonri      sS     XDLT--i8899FFFEtUT222222r   c                 |    | dz  }dt          |          d}t          j        t          j        d|gd           d S )Nrb   zif 1:
        import os
        os.environ['TZDIR'] = '/tmp/non_existent'

        from pyarrow import orc
        try:
            orc_file = orc.ORCFile(z)
            orc_file.read()
        except Exception as e:
            assert "time zone database" in str(e).lower(), e
        else:
            assert False, "Should have raised exception"
    -cTcheck)rf   
subprocessrunsys
executable)r   rh   codes      r   test_timezone_database_absentrs      sV     33D %(II  D NCND$/t<<<<<<r   c                    t          d          }|                                st          j        d|            t          |dz            }	 t	          j        ||d           n.# t          $ r!}t          j        d|            Y d }~nd }~ww xY w|dz  dz                      d	           | d
z  }dt          |          dt          |          d}t          j
        t          j        d|gd           d S )Nz/usr/share/zoneinfoz Test needs timezone database in zoneinfoT)symlinksz"Failed to copy timezone database: USPacific)
missing_okrb   z6if 1:
        import os
        os.environ['TZDIR'] = zA

        from pyarrow import orc
        orc_file = orc.ORCFile(z)
        try:
            orc_file.read()
        except Exception as e:
            assert "zoneinfo/US/Pacific" in str(e), e
        else:
            assert False, "Should have raised exception"
    rk   rl   )r   existspytestskipshutilcopytreeOSErrorunlinkrf   rn   ro   rp   rq   )r   tmpdirsource_tzdirtzdirerh   rr   s          r   test_timezone_absentr      s=    -..L   GE|EEFFF*$%%E>ed;;;;; > > ><<<========>
T\I%%%66633D"5zz 
 !$D		  D NCND$/t<<<<<<s   A& &
B0BBc                 R   ddl m} |                    | dz                                            }|j        dk    sJ t          j        dt          j                    fdt          j                    fdt          j	                    fdt          j
                    fdt          j                    fd	t          j                    fd
t          j                    fdt          j                    fdt          j                    fdt          j        dt          j        t          j        dt          j
                    fdt          j                    fg                    fg          fdt          j        t          j        dt          j
                    fdt          j                    fg                    fdt          j        t          j                    t          j        dt          j
                    fdt          j                    fg                    fg          }|j        |k    sJ d S )Nr   rG   zTestOrcFile.emptyFile.orcboolean1byte1short1int1long1float1double1bytes1string1middlelistr   )rJ   r
   rK   rL   num_rowsrN   schemabool_int8int16int32int64float32float64binarystringstructlist_map_)r   r
   r]   expected_schemas       r   test_orcfile_emptyr      s   KK"==>>CCEEE>Qi	RXZZ 	"'))	28::		"(**	2:<< 	BJLL!	29;;	BIKK 	29bh	FBHJJ/%ry{{35 6 67 7 8   	
 
I

+!29;;/1 2 2
 
 	 
		FBHJJ#7$-ry{{#;#= > >  	'!  O0 <?******r   c                 >   ddl m} t          j        dg di          }| dz  }|                                 |dz  }|                    |t          |                     |                    |t          j	                              }|
                    |          sJ |                    dt          j        |                     }|
                    |          sJ |                    t          j        |                    }|
                    |          sJ d S )	Nr   rG   a         data_dirzdata.orc)
filesystemzdata_dir/data.orc)rJ   r
   rN   r]   mkdirwrite_tablerf   
read_tabler   LocalFileSystemrT   r   _filesystem_uri)r   r
   r]   	directoryrh   results         r   test_filesystem_urir      s'   Hc999%&&E#IOOz!DOOE3t99%%% ^^DR-?-A-A^BBF== ^^(<V(D(D  F FF== ^^T""$ $F==r   c                 
   ddl m} t          j        g d          }t          j        g d          }t          j        ||d          }|                     d          }|                    ||           |                    |          }|                    |          sJ |                    |g           }d|j	        k    sJ d|j
        k    sJ |                    |dg	          }d|j	        k    sJ d
|j
        k    sJ d S )Nr   rG   r   Nr   NNArrowNORCr   utf8test.orc   r   rH   r   )rJ   r
   rN   arrayr]   joinr   r   rT   r   num_columns)r   r
   r   br]   fileoutput_tables          r   test_orcfile_readwriter      s%   
###$$A
---..AHq!,,--E;;z""DOOE4   >>$''L<<%%%%%>>$++L%%%%%(((((>>$	>::L%%%%%(((((((r   c                     ddl m}  ddlm}  |            }t	          j        g d          }t	          j        g d          }t	          j        ||d          }|                     ||           |                    d           | 	                    |          }|
                                }|                    |          sJ d S )Nr   rG   )BytesIOr   r   r   )rJ   r
   ior   rN   r   r]   r   seekrK   rL   rT   )r
   r   bufr   r   r]   r\   r   s           r   test_bytesio_readwriter     s    
'))C
###$$A
---..AHq!,,--EOOE3HHQKKK{{3H==??L<<%%%%%%%r   c                     ddl m}  t          j                    }t          j        g d          }t          j        g d          }t          j        ||d          }|                     ||           t          j        |                                          }| 	                    |          }|
                                }|                    |          sJ |j        dk    sJ |j        dk    sJ |j        dk    sJ |j        d	k    sJ t          j                    }t!          j        t$                    5  |                     ||           d d d            n# 1 swxY w Y   t          j        |                                          }| 	                    |          }|
                                }|                    |          sJ |j        dk    sJ |j        dk    sJ |j        dk    sJ |j        d	k    sJ d S )
Nr   rG   r   r   r   UNCOMPRESSEDz0.12i'     rJ   r
   rN   BufferOutputStreamr   r]   r   BufferReadergetvaluerK   rL   rT   compressionfile_versionrow_index_stridecompression_sizer{   warnsFutureWarningr
   buffer_output_streamr   r   r]   buffer_readerr\   r   s           r   test_buffer_readwriter   "  sP   022
###$$A
---..AHq!,,--EOOE/000O$8$A$A$C$CDDM{{=))H==??L<<%%%%%>1111 F****$----$---- 022	m	$	$ 5 5,e4445 5 5 5 5 5 5 5 5 5 5 5 5 5 5O$8$A$A$C$CDDM{{=))H==??L<<%%%%%>1111 F****$----$------s   9EE #E c            	         ddl m}  t          j                    }t          j        g d          }t          j        g d          }t          j        ||d          }|                     ||dddd	
           t          j        |                                          }| 	                    |          }|
                                }|                    |          sJ |j        dk    sJ |j        dk    sJ |j        dk    sJ |j        d	k    sJ t          j                    }t!          j        t$                    5  |                     ||dddd	
           d d d            n# 1 swxY w Y   t          j        |                                          }| 	                    |          }|
                                }|                    |          sJ |j        dk    sJ |j        dk    sJ |j        dk    sJ |j        d	k    sJ d S )Nr   rG   r   r   r   snappyz0.11i  r   )r   r   r   compression_block_sizeSNAPPYuncompressedi N  r   r   r   s           r   'test_buffer_readwrite_with_writeoptionsr   C  s   022
###$$A
---..AHq!,,--EOO$     O$8$A$A$C$CDDM{{=))H==??L<<%%%%%8++++ F****$,,,,$---- 022	m	$	$ 
 
 &"#( 	 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 O$8$A$A$C$CDDM{{=))H==??L<<%%%%%>1111 F****$----$------s   >E&&E*-E*c                     ddl m}  t          j                    }t          j        g d          }t          j        d|i          }t          j        t                    5  | 	                    ||d           d d d            n# 1 swxY w Y   t          j        t                    5  | 	                    ||d           d d d            n# 1 swxY w Y   t          j        t                    5  | 	                    ||d           d d d            n# 1 swxY w Y   t          j        t                    5  | 	                    ||d	           d d d            n# 1 swxY w Y   t          j        t                    5  | 	                    ||d
	           d d d            n# 1 swxY w Y   t          j        t                    5  | 	                    ||d           d d d            n# 1 swxY w Y   t          j        t                    5  | 	                    ||d           d d d            n# 1 swxY w Y   t          j        t                    5  | 	                    ||d           d d d            n# 1 swxY w Y   t          j        t                    5  | 	                    ||d           d d d            n# 1 swxY w Y   t          j        t                    5  | 	                    ||d           d d d            n# 1 swxY w Y   t          j        t                    5  | 	                    ||d           d d d            n# 1 swxY w Y   t          j        t                    5  | 	                    ||d           d d d            n# 1 swxY w Y   t          j        t                    5  | 	                    ||d           d d d            n# 1 swxY w Y   t          j        t                    5  | 	                    ||d           d d d            n# 1 swxY w Y   t          j        t                    5  | 	                    ||d           d d d            n# 1 swxY w Y   t          j        t                    5  | 	                    ||d           d d d            n# 1 swxY w Y   t          j        t                    5  | 	                    ||d           d d d            n# 1 swxY w Y   t          j        t                    5  | 	                    ||d           d d d            n# 1 swxY w Y   t          j        t                    5  | 	                    ||d           d d d            n# 1 swxY w Y   t          j        t                    5  | 	                    ||d           d d d            n# 1 swxY w Y   t          j        t                    5  | 	                    ||d           d d d            n# 1 swxY w Y   t          j        t                    5  | 	                    ||d           d d d            n# 1 swxY w Y   t          j        t                    5  | 	                    ||d           d d d            n# 1 swxY w Y   t          j        t                    5  | 	                    ||d           d d d            n# 1 swxY w Y   t          j        t                    5  | 	                    ||d !           d d d            n# 1 swxY w Y   t          j        t                    5  | 	                    ||dd"g!           d d d            n# 1 swxY w Y   t          j        t                    5  | 	                    ||h d#!           d d d            n# 1 swxY w Y   t          j        t                    5  | 	                    ||d$           d d d            n# 1 swxY w Y   t          j        t                    5  | 	                    ||d%$           d d d            n# 1 swxY w Y   t          j        t                    5  | 	                    ||d&$           d d d            d S # 1 swxY w Y   d S )'Nr   rG   r   r   )
batch_sizeigR @gp=
ף?)r   z1.1)stripe_sizeipgG @)r   nonezlid)r   i8gR"@)compression_strategynolarge)r   igGz0@cat)padding_tolerancearrow)dictionary_key_size_thresholdg333333?g	r   )bloom_filter_columnsgffffff?>   r   r   )bloom_filter_fppg?g)rJ   r
   rN   r   r   r]   r{   raises
ValueErrorr   	TypeError)r
   r   r   r]   s       r   +test_buffer_readwrite_with_bad_writeoptionsr   s  s   022
###$$AHgq\""E 
z	"	" 
 
  	 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
z	"	" 
 
  	 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
z	"	" 
 
  	 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
z	"	" 
 
  	 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
z	"	" 
 
  	 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
z	"	" 
 
  	 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
z	"	" 
 
  	 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
z	"	" 
 
  	 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
y	!	! 
 
  	 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
z	"	" 
 
  	 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
z	"	" 
 
  	 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
z	"	" 
 
 #$ 	 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
z	"	" 
 
 #' 	 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
z	"	" 
 
 #* 	 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
y	!	! 
 
 !" 	 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
z	"	" 
 
 !% 	 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
z	"	" 
 
 !( 	 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
z	"	" 
 
  	 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
z	"	" 
 
 ! 	 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
z	"	" 
 
 $ 	 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
z	"	" 
 
 # 	 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
z	"	" 
 
 *1 	 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
z	"	" 
 
 *- 	 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
z	"	" 
 
 *. 	 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
z	"	" 
 
 !) 	 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
z	"	" 
 
 "#S 	 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
z	"	" 
 
 !+ 	 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
z	"	" 
 
 $ 	 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
z	"	" 
 
   	 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
z	"	" 
 
 ! 	 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
s  BBB(CCC1DDD:EE#&E#F((F,/F,G11G58G5H::H>H>JJ
J'KKK0LLL9MM"%M"N''N+.N+O00O47O4P99P= P=RR	R&SSS/TTT8UU!$U!V&&V*-V*
W//W36W3X88X<?X<ZZZ%[

[[.\\\7]] #]  ^''^+.^+_22_69_6`;;`?`?bbb(cccc                 r
   ddl m} t          j        dt          j                              }t          j        dt          j        |g                    }t          j        dt          j                              t          j        dt          j        t          j        dt          j                                                  t          j        dt          j        |t          j        d	t          j                              g                    t          j        d
t          j        t          j        dt          j        t          j        dt          j                              t          j        d	t          j                              g                                        t          j        dt          j                              g}dgddggddiddgddddddggdgg}t          j        |t          j	        |                    }t          | dz            }|                    ||           |                    |          }|                                }	|	                    |          sJ |                    ddg          }
|
                    |                    ddg                    sJ |                    g d          }|                    |                    g d                    sJ |                    dg          }t          j        ddddiigi          }|                    |          sJ |                    dg          }t          j        dd	digi          }|                    |          sJ |                    g d          }|                    |                    ddg                    sJ |                    dg          }t          j        d
ddiddiggi          }|                    |          sJ |                    ddg          }
|
                    |                    ddg                    sJ |                    g d           }|                    |                    g d!                    sJ t!          j        t$                    5  |                    d"g           d d d            n# 1 swxY w Y   t!          j        t&                    5  |                    dg           d d d            d S # 1 swxY w Y   d S )#Nr   rG   innerr   basicr   itemr   inner2list-structinner1basic2r   r   r   r   )r   r         )r   r         	   )r   r   rH   )r   r   r   struct.middle.innerstruct.inner2)r   r   r   zlist-struct.inner1r   )r   r   r   wrong)rJ   r
   rN   fieldr   r   r   r   r]   r   rf   r   rK   rL   rT   selectr{   r   IOErrorr   )tempdirr
   r   r   fieldsarrsr]   rh   r\   result1result2result3result4	expected4result5	expected5result6result7	expected7s                      r   test_column_selectionr  U  sh    HWbhjj))EXh	5' 2 233F
"(**%%
BHRXfbhjj99::	
 	
 	bi(BHJJ)G)G HII	
 	
 	28BH	HXrxzz22HXrxzz22#  % %  	
 	
 	28::&&!F& 
q!fXGQ<1==>!
$
$a&@&@	ABQCID HT")F"3"3444Ew#$$DOOE4   {{4  H mmooG>>%      mmWh$7m88G>>%,,':;;<<<<<mm$@$@$@mAAG>>%,,'C'C'CDDEEEEE mm%:$;m<<G(h!%=$>?@@I>>)$$$$$mm_$5m66G(h]O455I>>)$$$$$mm@@@   G >>%,,'9::;;;;;mm%9$:m;;G-Ha=8Q-*H)IJKKI>>)$$$$$ mmQFm++G>>%,,':;;<<<<<mmIIIm..G>>%,,'H'H'HIIJJJJJ 
w		 ) ) 	wi((() ) ) ) ) ) ) ) ) ) ) ) ) ) )
 
z	"	" # #qc"""# # # # # # # # # # # # # # # # # #s$   ?S##S'*S'T,,T03T0c                    ddl m} t          | dz            }|                    |          5 }t	          j        t                    5  |                                 d d d            n# 1 swxY w Y   d d d            d S # 1 swxY w Y   d S )Nr   rG   r   )rJ   r
   rf   	ORCWriterr{   r   AttributeErrortest)r  r
   rh   writers       r   test_wrong_usage_orc_writerr    s   w#$$D	t		 ]>** 	 	KKMMM	 	 	 	 	 	 	 	 	 	 	 	 	 	 	                 s5   BA)B)A-	-B0A-	1BBBc                 V   ddl m} t          | dz            }t          j        g d          }t          j        g d          }t          j        ||d          }t          j        t          j                  5  |	                    ||           d d d            d S # 1 swxY w Y   d S )Nr   rG   r   r   )NNNNr   )
rJ   r
   rf   rN   r   r]   r{   r   ArrowNotImplementedErrorr   )r  r
   rh   r   r   r]   s         r    test_orc_writer_with_null_arraysr    s    w#$$D
###$$A
)))**AHq!,,--E	r2	3	3 % %t$$$% % % % % % % % % % % % % % % % % %s   :BB"%B")NN)F))r&   r!   pathlibr   r}   rn   rp   r{   rJ   rN   r   pyarrow.testsr   markr
   
pytestmarkpandas.testingr   pandasr"   ImportErrorfixturer   r<   rE   r`   parametrizeri   rs   r   r   r   r   r   r   r   r   r   r  r  r  r   r   r   <module>r%     sc  $              



                 
 [_
	111111 	 	 	D	 h      #' #' #'L? ? ? ?"& "& "& "&J  & & &  
	3 	3  	3= = =(= = =>+ + +B     0) ) )&& & &. . .B ,. ,. ,.^_
 _
 _
DL# L# L#^  % % % % %s   
A AA