
    y-PhJ1                         d dl mZ d dlZd dlmZ d dlmZ d dlmZ  G d d          Z	dZ
 G d d	          Zdd
Zde_        dddddddddddddZd                    e
          e_        dS )    )IntegralN)Table)_resolve_filesystem_and_pathc                      e Zd ZdZd Zed             Zed             Zed             Zed             Z	ed             Z
ed             Zed	             Zed
             Zed             Zed             Zed             Zed             Zed             Zed             Zed             Zed             Zed             ZddZddZddZdS )ORCFilea  
    Reader interface for a single ORC file

    Parameters
    ----------
    source : str or pyarrow.NativeFile
        Readable source. For passing Python file objects or byte buffers,
        see pyarrow.io.PythonFileInterface or pyarrow.io.BufferReader.
    c                 j    t          j                    | _        | j                            |           d S N)_orc	ORCReaderreaderopen)selfsources     K/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/pyarrow/orc.py__init__zORCFile.__init__&   s-    n&&         c                 4    | j                                         S )z/The file metadata, as an arrow KeyValueMetadata)r   metadatar   s    r   r   zORCFile.metadata*        {##%%%r   c                 4    | j                                         S )z#The file schema, as an arrow schema)r   schemar   s    r   r   zORCFile.schema/   s     {!!###r   c                 4    | j                                         S )zThe number of rows in the file)r   nrowsr   s    r   r   zORCFile.nrows4   s     {  """r   c                 4    | j                                         S )z!The number of stripes in the file)r   nstripesr   s    r   r   zORCFile.nstripes9   r   r   c                 4    | j                                         S )z4Format version of the ORC file, must be 0.11 or 0.12)r   file_versionr   s    r   r   zORCFile.file_version>   s     {'')))r   c                 4    | j                                         S )z2Software instance and version that wrote this file)r   software_versionr   s    r   r    zORCFile.software_versionC        {++---r   c                 4    | j                                         S )zCompression codec of the file)r   compressionr   s    r   r#   zORCFile.compressionH        {&&(((r   c                 4    | j                                         S )z?Number of bytes to buffer for the compression codec in the file)r   compression_sizer   s    r   r&   zORCFile.compression_sizeM   r!   r   c                 4    | j                                         S )z{Name of the writer that wrote this file.
        If the writer is unknown then its Writer ID
        (a number) is returned)r   writerr   s    r   r(   zORCFile.writerR   s    
 {!!###r   c                 4    | j                                         S )zVersion of the writer)r   writer_versionr   s    r   r*   zORCFile.writer_versionY        {))+++r   c                 4    | j                                         S )zRNumber of rows per an entry in the row index or 0
        if there is no row index)r   row_index_strider   s    r   r-   zORCFile.row_index_stride^   s     {++---r   c                 4    | j                                         S )zNumber of stripe statistics)r   nstripe_statisticsr   s    r   r/   zORCFile.nstripe_statisticsd        {--///r   c                 4    | j                                         S )z/Length of the data stripes in the file in bytes)r   content_lengthr   s    r   r2   zORCFile.content_lengthi   r+   r   c                 4    | j                                         S )z<The number of compressed bytes in the file stripe statistics)r   stripe_statistics_lengthr   s    r   r4   z ORCFile.stripe_statistics_lengthn   s     {33555r   c                 4    | j                                         S )z1The number of compressed bytes in the file footer)r   file_footer_lengthr   s    r   r6   zORCFile.file_footer_lengths   r0   r   c                 4    | j                                         S )z*The number of bytes in the file postscript)r   file_postscript_lengthr   s    r   r8   zORCFile.file_postscript_lengthx   s     {11333r   c                 4    | j                                         S )zThe number of bytes in the file)r   file_lengthr   s    r   r:   zORCFile.file_length}   r$   r   Nc                 :   |d S | j         }g }|D ]}t          |t                    rpt          |          }d|cxk    rt	          |          k     r&n n#||         j        }|                    |           ft          dt	          |          |fz            |c S |S )Nr   z/Column indices must be in 0 <= ind < %d, got %d)r   
isinstancer   intlennameappend
ValueError)r   columnsr   namescols        r   _select_nameszORCFile._select_names   s    ?4 
	 
	C#x(( 	#hh))))c&kk))))) +*CLL%%%%$ &/25f++s1C&D E E E r   c                 d    |                      |          }| j                            ||          S )a  Read a single stripe from the file.

        Parameters
        ----------
        n : int
            The stripe index
        columns : list
            If not None, only these columns will be read from the stripe. A
            column name may be a prefix of a nested field, e.g. 'a' will select
            'a.b', 'a.c', and 'a.d.e'

        Returns
        -------
        pyarrow.RecordBatch
            Content of the stripe as a RecordBatch.
        rB   )rE   r   read_stripe)r   nrB   s      r   rH   zORCFile.read_stripe   s2    " $$W--{&&q'&:::r   c                 b    |                      |          }| j                            |          S )a  Read the whole file.

        Parameters
        ----------
        columns : list
            If not None, only these columns will be read from the file. A
            column name may be a prefix of a nested field, e.g. 'a' will select
            'a.b', 'a.c', and 'a.d.e'. Output always follows the
            ordering of the file and not the `columns` list.

        Returns
        -------
        pyarrow.Table
            Content of the file as a Table.
        rG   )rE   r   read)r   rB   s     r   rK   zORCFile.read   s0      $$W--{000r   r	   )__name__
__module____qualname____doc__r   propertyr   r   r   r   r   r    r#   r&   r(   r*   r-   r/   r2   r4   r6   r8   r:   rE   rH   rK    r   r   r   r      s4        ! ! ! & & X& $ $ X$ # # X# & & X& * * X* . . X. ) ) X) . . X. $ $ X$ , , X, . . X.
 0 0 X0 , , X, 6 6 X6 0 0 X0 4 4 X4 ) ) X)   (; ; ; ;(1 1 1 1 1 1r   r   a  file_version : {"0.11", "0.12"}, default "0.12"
    Determine which ORC file version to use.
    `Hive 0.11 / ORC v0 <https://orc.apache.org/specification/ORCv0/>`_
    is the older version
    while `Hive 0.12 / ORC v1 <https://orc.apache.org/specification/ORCv1/>`_
    is the newer one.
batch_size : int, default 1024
    Number of rows the ORC writer writes at a time.
stripe_size : int, default 64 * 1024 * 1024
    Size of each ORC stripe in bytes.
compression : string, default 'uncompressed'
    The compression codec.
    Valid values: {'UNCOMPRESSED', 'SNAPPY', 'ZLIB', 'LZ4', 'ZSTD'}
    Note that LZ0 is currently not supported.
compression_block_size : int, default 64 * 1024
    Size of each compression block in bytes.
compression_strategy : string, default 'speed'
    The compression strategy i.e. speed vs size reduction.
    Valid values: {'SPEED', 'COMPRESSION'}
row_index_stride : int, default 10000
    The row index stride i.e. the number of rows per
    an entry in the row index.
padding_tolerance : double, default 0.0
    The padding tolerance.
dictionary_key_size_threshold : double, default 0.0
    The dictionary key size threshold. 0 to disable dictionary encoding.
    1 to always enable dictionary encoding.
bloom_filter_columns : None, set-like or list-like, default None
    Columns that use the bloom filter.
bloom_filter_fpp : double, default 0.05
    Upper limit of the false-positive rate of the bloom filter.
c                   z    e Zd Zd                    e          ZdZddddddd	d
d
ddddZd Zd Z	d Z
d Zd ZdS )	ORCWritera  
Writer interface for a single ORC file

Parameters
----------
where : str or pyarrow.io.NativeFile
    Writable target. For passing Python file objects or byte buffers,
    see pyarrow.io.PythonFileInterface, pyarrow.io.BufferOutputStream
    or pyarrow.io.FixedSizeBufferWriter.
{}
F0.12      uncompressed   speed'          N皙?r   
batch_sizestripe_sizer#   compression_block_sizecompression_strategyr-   padding_tolerancedictionary_key_size_thresholdbloom_filter_columnsbloom_filter_fppc                    t          j                    | _        | j                            |||||||||	|
||           d| _        d S )Nr]   T)r
   rS   r(   r   is_open)r   wherer   r^   r_   r#   r`   ra   r-   rb   rc   rd   re   s                r   r   zORCWriter.__init__   sc     n&&%!###9!5-/*G!5- 	 	
 	
 	
 r   c                 .    |                                   d S r	   closer   s    r   __del__zORCWriter.__del__      

r   c                     | S r	   rQ   r   s    r   	__enter__zORCWriter.__enter__  s    r   c                 .    |                                   d S r	   rj   )r   argskwargss      r   __exit__zORCWriter.__exit__  rm   r   c                 L    | j         sJ | j                            |           dS )a
  
        Write the table into an ORC file. The schema of the table must
        be equal to the schema used when opening the ORC file.

        Parameters
        ----------
        table : pyarrow.Table
            The table to be written into the ORC file
        N)rg   r(   write)r   tables     r   ru   zORCWriter.write  s.     |%     r   c                 X    | j         r"| j                                         d| _         dS dS )z$
        Close the ORC file
        FN)rg   r(   rk   r   s    r   rk   zORCWriter.close#  s7     < 	!K DLLL	! 	!r   )rL   rM   rN   format_orc_writer_args_docsrO   rg   r   rl   ro   rs   ru   rk   rQ   r   r   rS   rS      s        
 F !!  G % -+(-&-"'#&/2&*"&    <      ! ! !! ! ! ! !r   rS   c                 4   t          | |          \  }}||                    |          } |Ht          |          dk    r5t          |                                                               |          }n#t          |                               |          }|S )Nr   rG   )r   open_input_filer>   r   rK   select)r   rB   
filesystempathresults        r   
read_tabler   ,  s    3FJGGJ++D11s7||q00%%''..w77%%g%66Mr   a  
Read a Table from an ORC file.

Parameters
----------
source : str, pyarrow.NativeFile, or file-like object
    If a string passed, can be a single file name. For file-like objects,
    only read a single file. Use pyarrow.BufferReader to read a file
    contained in a bytes or buffer-like object.
columns : list
    If not None, only these columns will be read from the file. A column
    name may be a prefix of a nested field, e.g. 'a' will select 'a.b',
    'a.c', and 'a.d.e'. Output always follows the ordering of the file and
    not the `columns` list. If empty, no columns will be read. Note
    that the table will still have the correct num_rows set despite having
    no columns.
filesystem : FileSystem, default None
    If nothing passed, will be inferred based on path.
    Path will try to be found in the local on-disk filesystem otherwise
    it will be parsed as an URI to determine the filesystem.
rT   rU   rV   rW   rX   rY   rZ   r[   r\   r]   c                   t          |t                    r t          j        dt          d           || }} t          |||||||||	|
||          5 }|                    |            d d d            d S # 1 swxY w Y   d S )NzThe order of the arguments has changed. Pass as 'write_table(table, where)' instead. The old order will raise an error in the future.   )
stacklevelr]   )r<   r   warningswarnFutureWarningrS   ru   )rv   rh   r   r^   r_   r#   r`   ra   r-   rb   rc   rd   re   r(   s                 r   write_tabler   P  s     % $&'4	
 	
 	
 	

 eu	!51)+&C1)
 
 
  
U                 s   A44A8;A8a]  
Write a table into an ORC file.

Parameters
----------
table : pyarrow.lib.Table
    The table to be written into the ORC file
where : str or pyarrow.io.NativeFile
    Writable target. For passing Python file objects or byte buffers,
    see pyarrow.io.PythonFileInterface, pyarrow.io.BufferOutputStream
    or pyarrow.io.FixedSizeBufferWriter.
{}
)NN)numbersr   r   pyarrow.libr   pyarrow._orcr
   
pyarrow.fsr   r   ry   rS   r   rO   r   rx   rQ   r   r   <module>r      sA  &                    3 3 3 3 3 3`1 `1 `1 `1 `1 `1 `1 `1F DI! I! I! I! I! I! I! I!X
 
 
 

 0 $,*',%,!&"%.1%)!%! ! ! ! !H F !!    r   