
    z-Ph                     X    d dl mZ d dlmZmZmZmZ d dlZd dl	m
Z
  G d d          ZdS )    )annotations)AnyIterableOptionalSequenceN)_PyArrowColumnc                      e Zd ZdZ	 	 d(d)dZ	 d(d*dZed+d            Zd,dZd,dZ	d,dZ
d-dZd.dZd/dZd0dZd1dZd2d"Z	 d3d4d'Zd#S )5_PyArrowDataFramea2  
    A data frame class, with only the methods required by the interchange
    protocol defined.

    A "data frame" represents an ordered collection of named columns.
    A column's "name" must be a unique string.
    Columns may be accessed by name or by position.

    This could be a public data frame class, or an object with the methods and
    attributes defined on this DataFrame class could be returned from the
    ``__dataframe__`` method of a public data frame class in a library adhering
    to the dataframe interchange protocol specification.
    FTdfpa.Table | pa.RecordBatchnan_as_nullbool
allow_copyreturnNonec                V    || _         |du rt          d          || _        || _        dS )z
        Constructor - an instance of this (private) class is returned from
        `pa.Table.__dataframe__` or `pa.RecordBatch.__dataframe__`.
        TzKnan_as_null=True currently has no effect, use the default nan_as_null=FalseN)_dfRuntimeError_nan_as_null_allow_copy)selfr   r   r   s       ]/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/pyarrow/interchange/dataframe.py__init__z_PyArrowDataFrame.__init__.   sG      $4   (%    c                .    t          | j        ||          S )a  
        Construct a new exchange object, potentially changing the parameters.
        ``nan_as_null`` is a keyword intended for the consumer to tell the
        producer to overwrite null values in the data with ``NaN``.
        It is intended for cases where the consumer does not support the bit
        mask or byte mask that is the producer's native representation.
        ``allow_copy`` is a keyword that defines whether or not the library is
        allowed to make a copy of the data. For example, copying data would be
        necessary if a library supports strided buffers, given that this
        protocol specifies contiguous buffers.
        )r
   r   )r   r   r   s      r   __dataframe__z_PyArrowDataFrame.__dataframe__C   s     !;
CCCr   dict[str, Any]c                    | j         j        j        r/d | j         j        j                                        D             }|S i S )a!  
        The metadata for the data frame, as a dictionary with string keys. The
        contents of `metadata` may be anything, they are meant for a library
        to store information that it needs to, e.g., roundtrip losslessly or
        for two implementations to share data that is not (yet) part of the
        interchange protocol specification. For avoiding collisions with other
        entries, please add name the keys with the name of the library
        followed by a period and the desired name, e.g, ``pandas.indexcol``.
        c                l    i | ]1\  }}d |                     d          z   |                     d          2S )zpyarrow.utf8)decode).0kvs      r   
<dictcomp>z._PyArrowDataFrame.metadata.<locals>.<dictcomp>a   sQ     M M M#'1a  *AHHV,<,<<ahhv>N>N M M Mr   )r   schemametadataitems)r   schema_metadatas     r   r'   z_PyArrowDataFrame.metadataS   sT     8?# 	M M+/8?+C+I+I+K+KM M MO""Ir   intc                    | j         j        S )z@
        Return the number of columns in the DataFrame.
        )r   num_columnsr   s    r   r,   z_PyArrowDataFrame.num_columnsg   s     x##r   c                    | j         j        S )zK
        Return the number of rows in the DataFrame, if available.
        )r   num_rowsr-   s    r   r/   z_PyArrowDataFrame.num_rowsm   s     x  r   c                    t          | j        t          j                  rdS | j                                        }t          |          S )zH
        Return the number of chunks the DataFrame consists of.
           )
isinstancer   paRecordBatch
to_batcheslen)r   batchess     r   
num_chunksz_PyArrowDataFrame.num_chunkss   s@     dh// 	 1 h))++Gw<<r   Iterable[str]c                $    | j         j        j        S )z?
        Return an iterator yielding the column names.
        )r   r&   namesr-   s    r   column_namesz_PyArrowDataFrame.column_names   s     x$$r   ir   c                ^    t          | j                            |          | j                  S )z>
        Return the column at the indicated position.
        r   r   r   columnr   )r   r=   s     r   
get_columnz_PyArrowDataFrame.get_column   s1     dhooa00)-)9; ; ; 	;r   namestrc                ^    t          | j                            |          | j                  S )zE
        Return the column whose name is the indicated name.
        r?   r@   )r   rC   s     r   get_column_by_namez$_PyArrowDataFrame.get_column_by_name   s1     dhood33)-)9; ; ; 	;r   Iterable[_PyArrowColumn]c                4      fd j         j        D             S )z:
        Return an iterator yielding the columns.
        c                <    g | ]}t          |j                   S )r?   )r   r   )r"   colr   s     r   
<listcomp>z1_PyArrowDataFrame.get_columns.<locals>.<listcomp>   s9     
 
 
 34+;<<<
 
 
r   )r   columnsr-   s   `r   get_columnsz_PyArrowDataFrame.get_columns   s3    
 
 
 
x'
 
 
 	
r   indicesSequence[int]c                    t          | j                            t          |                    | j        | j                  S )zS
        Create a new DataFrame by selecting a subset of columns by index.
        r
   r   selectlistr   r   )r   rN   s     r   select_columnsz _PyArrowDataFrame.select_columns   s7     !HOODMM**D,=t?O
 
 	
r   r;   Sequence[str]c                    t          | j                            t          |                    | j        | j                  S )zR
        Create a new DataFrame by selecting a subset of columns by name.
        rQ   )r   r;   s     r   select_columns_by_namez(_PyArrowDataFrame.select_columns_by_name   s7     !HOODKK(($*;T=M
 
 	
r   Nn_chunksOptional[int]Iterable[_PyArrowDataFrame]c                    |r|dk    r                                  |z  }                                  |z  dk    r|dz  }t           j        t          j                  r j                            |          }nGg }t          d||z  |          D ]0}|                     j                            ||                     1t          |          |dz
  k    r4|                    t          j
        g g j        j                             nAt           j        t          j                  r j                                        }n j        g} fd|D             }|S )a  
        Return an iterator yielding the chunks.

        By default (None), yields the chunks that the data is stored as by the
        producer. If given, ``n_chunks`` must be a multiple of
        ``self.num_chunks()``, meaning the producer must subdivide each chunk
        before yielding it.

        Note that the producer must ensure that all columns are chunked the
        same way.
        r1   r   )max_chunksize)r&   c                F    g | ]}t          |j        j                  S  )r
   r   r   )r"   batchr   s     r   rK   z0_PyArrowDataFrame.get_chunks.<locals>.<listcomp>   sC     * * *  &e&*&7&*&68 8 * * *r   )r/   r2   r   r3   Tabler5   rangeappendslicer6   record_batchr&   )r   rX   
chunk_sizer7   startiterators   `     r   
get_chunksz_PyArrowDataFrame.get_chunks   sk     	%1H4J}})Q..a
$(BH-- F(--J-GG"1j8&;ZHH F FENN48>>%#D#DEEEE 7||x!|++rtDHOLLLMMM $(BH-- %(--//8** * * * ")* * * r   )FT)r   r   r   r   r   r   r   r   )r   r   r   r   r   r
   )r   r   )r   r*   )r   r9   )r=   r*   r   r   )rC   rD   r   r   )r   rG   )rN   rO   r   r
   )r;   rU   r   r
   )N)rX   rY   r   rZ   )__name__
__module____qualname____doc__r   r   propertyr'   r,   r/   r8   r<   rB   rF   rM   rT   rW   rh   r^   r   r   r
   r
      sU          "& & & & &, =AD D D D D     X&$ $ $ $! ! ! !       % % % %; ; ; ;; ; ; ;
 
 
 

 
 
 

 
 
 
 )-) ) ) ) ) ) )r   r
   )
__future__r   typingr   r   r   r   pyarrowr3   pyarrow.interchange.columnr   r
   r^   r   r   <module>rr      s   $ # " " " " "                5 5 5 5 5 5z z z z z z z z z zr   