
    z-PhL                       U d dl mZ d dlmZmZ d dlmZmZmZ d dl	Z
d dlZd dlmZ d dlmZ eZeZeZej         e
j                     e
j                     e
j                     e
j                    dej         e
j                     e
j                     e
j                     e
j                    dej         e
j                     e
j                     e
j                     dej!         e
j"                     e
j                    dej#        d	 e
j$                    iiZ%d
e&d<   d2d3dZ'd2d4dZ(	 d2d5dZ)	 d2d6dZ*	 d2d6dZ+	 d2d7dZ,d Z-d Z.	 	 d8d9d)Z/	 	 d8d:d/Z0	 	 d8d;d1Z1dS )<    )annotations)AnyTuple)	DtypeKindColumnBuffersColumnNullTypeN)Dtype)          @   )r   r   r   )   r
   r
   zdict[DtypeKind, dict[int, Any]]_PYARROW_DTYPESTdfDataFrameObjectreturnpa.Tablec                6   t          | t          j                  r| S t          | t          j                  r t          j                            | g          S t          | d          st          d          t          |                     |          |          S )a.  
    Build a ``pa.Table`` from any DataFrame supporting the interchange protocol.

    Parameters
    ----------
    df : DataFrameObject
        Object supporting the interchange protocol, i.e. `__dataframe__`
        method.
    allow_copy : bool, default: True
        Whether to allow copying the memory to perform the conversion
        (if false then zero-copy approach is requested).

    Returns
    -------
    pa.Table

    Examples
    --------
    >>> import pyarrow
    >>> from pyarrow.interchange import from_dataframe

    Convert a pandas dataframe to a pyarrow table:

    >>> import pandas as pd
    >>> df = pd.DataFrame({
    ...         "n_attendees": [100, 10, 1],
    ...         "country": ["Italy", "Spain", "Slovenia"],
    ...     })
    >>> df
       n_attendees   country
    0          100     Italy
    1           10     Spain
    2            1  Slovenia
    >>> from_dataframe(df)
    pyarrow.Table
    n_attendees: int64
    country: large_string
    ----
    n_attendees: [[100,10,1]]
    country: [["Italy","Spain","Slovenia"]]
    __dataframe__z#`df` does not support __dataframe__)
allow_copy)	
isinstancepaTableRecordBatchfrom_batcheshasattr
ValueError_from_dataframer   )r   r   s     b/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/pyarrow/interchange/from_dataframe.pyfrom_dataframer    ?   s    T "bh +		B	'	' +x$$bT***2'' @>???2++z+BB&02 2 2 2    c                   g }|                                  D ]'}t          ||          }|                    |           (|s$t          |           }|                    |           t          j                            |          S )a  
    Build a ``pa.Table`` from the DataFrame interchange object.

    Parameters
    ----------
    df : DataFrameObject
        Object supporting the interchange protocol, i.e. `__dataframe__`
        method.
    allow_copy : bool, default: True
        Whether to allow copying the memory to perform the conversion
        (if false then zero-copy approach is requested).

    Returns
    -------
    pa.Table
    )
get_chunksprotocol_df_chunk_to_pyarrowappendr   r   r   )r   r   batcheschunkbatchs        r   r   r   u   s    " G  ,UJ??u ,R00u8  )))r!   r   boolpa.RecordBatchc                   i }|                                  D ]}t          |t                    st          d| d          ||v rt          d| d          |                     |          }|j        d         }|t          j        t          j        t          j	        t          j
        t          j        fv rt          ||          ||<   |t          j        k    rt          ||          ||<   |t          j        k    rt!          ||          ||<   t#          d| d          t$          j                            |          S )a  
    Convert interchange protocol chunk to ``pa.RecordBatch``.

    Parameters
    ----------
    df : DataFrameObject
        Object supporting the interchange protocol, i.e. `__dataframe__`
        method.
    allow_copy : bool, default: True
        Whether to allow copying the memory to perform the conversion
        (if false then zero-copy approach is requested).

    Returns
    -------
    pa.RecordBatch
    zColumn z is not a stringz is not uniquer   z
Data type z not handled yet)column_namesr   strr   get_column_by_namedtyper   INTUINTFLOATSTRINGDATETIMEcolumn_to_arrayBOOLbool_column_to_arrayCATEGORICAL categorical_column_to_dictionaryNotImplementedErrorr   r   from_pydict)r   r   columnsnamecolr/   s         r   r$   r$      sJ   * $&G!! L L$$$ 	?=t===>>>7??;t;;;<<<##D))	!MNO
 
 
 ,C<<GDMMin$$0jAAGDMMi+++<S*MMGDMM%&J5&J&J&JKKK>%%g...r!   r>   ColumnObjectpa.Arrayc                    |                                  }| j        }t          |||                                 | j        | j        |          }|S )a  
    Convert a column holding one of the primitive dtypes to a PyArrow array.
    A primitive type is one of: int, uint, float, bool (1 bit).

    Parameters
    ----------
    col : ColumnObject
    allow_copy : bool, default: True
        Whether to allow copying the memory to perform the conversion
        (if false then zero-copy approach is requested).

    Returns
    -------
    pa.Array
    )get_buffersr/   buffers_to_arraysizedescribe_nulloffset)r>   r   buffers	data_typedatas        r   r5   r5      sM    & ooG	IGYHHJJ-J&	( (D
 Kr!   c                H   |                                  }|d         d         d         }|dk    r|st          d          | j        }t          |||                                 | j        | j                  }|dk    r&t          j        |t          j
                              }|S )aD  
    Convert a column holding boolean dtype to a PyArrow array.

    Parameters
    ----------
    col : ColumnObject
    allow_copy : bool, default: True
        Whether to allow copying the memory to perform the conversion
        (if false then zero-copy approach is requested).

    Returns
    -------
    pa.Array
    rI   r   r
   zfBoolean column will be casted from uint8 and a copy is required which is forbidden by allow_copy=False)rB   RuntimeErrorr/   rC   rD   rE   rF   pccastr   bool_)r>   r   rG   rD   rH   rI   s         r   r7   r7      s    $ ooG6?1a D qyyyA
 
 	

 	IGYHHJJ-J( (D qyywtRXZZ((Kr!   pa.DictionaryArrayc                n   |st          d          | j        }|d         st          d          |d         }t          |          }|                                 }|d         \  }}t          |||                                 | j        | j                  }t          j
                            ||          }	|	S )aV  
    Convert a column holding categorical data to a pa.DictionaryArray.

    Parameters
    ----------
    col : ColumnObject
    allow_copy : bool, default: True
        Whether to allow copying the memory to perform the conversion
        (if false then zero-copy approach is requested).

    Returns
    -------
    pa.DictionaryArray
    zjCategorical column will be casted from uint8 and a copy is required which is forbidden by allow_copy=Falseis_dictionaryz-Non-dictionary categoricals not supported yet
categoriesrI   )rK   describe_categoricalr:   r5   rB   rC   rD   rE   rF   r   DictionaryArrayfrom_arrays)
r>   r   categorical
cat_column
dictionaryrG   _rH   indices
dict_arrays
             r   r9   r9     s    $  
A
 
 	

 *K' =!;= = 	= \*J ,,J ooG6?LAyw	"xxzz"0"z+ +G #//DDJr!   c                    t          j        d|           }|r9|                    d          |                    d          }}|dk    r|dz  }||fS t          d|            )z4Parse datetime `format_str` to interpret the `data`.zts([smun]):(.*)r      sz DateTime kind is not supported: )rematchgroupr:   )
format_strtimestamp_metaunittzs       r   parse_datetime_format_strrf   5  sw     X0*==N !''**N,@,@,C,Cb3;; CKDRx
MMM
N
NNr!   c                   | \  }}}}|t           j        k    r(t          |          \  }}t          j        ||          S t
                              |i                               |d          }|r|S t          d|  d          )z+Map column date type to pyarrow date type. )re   NzConversion for  is not yet supported.)r   r4   rf   r   	timestampr   getr:   )rH   kind	bit_widthf_stringrY   rd   re   pa_dtypes           r   map_date_typero   G  s    #, D)Xqy!!!,X66b|DR(((("&&tR0044YEE  	EO%C)CCCE E Er!   rG   r   rH   Tuple[DtypeKind, int, str, str]lengthintrE   r   rF   c                   | d         \  }}	 | d         \  }}	n# t           $ r d}Y nw xY w	 | d         \  }
}n# t           $ r d}
Y nw xY wt          j        |j        |j        |          }|rt          ||	||||          }nt          ||||||          }t          |          }|
r|\  }}}}t          j        |
j        |
j        |
          }|d         dk    rt          j                    }n-|dk    rt          j                    }nt          j	                    }t          j
                            |||||g|	          }n%t          j
                            ||||g|	          }|S )
a$  
    Build a PyArrow array from the passed buffer.

    Parameters
    ----------
    buffer : ColumnBuffers
        Dictionary containing tuples of underlying buffers and
        their associated dtype.
    data_type : Tuple[DtypeKind, int, str, str],
        Dtype description of the column as a tuple ``(kind, bit-width, format string,
        endianness)``.
    length : int
        The number of values in the array.
    describe_null: ColumnNullType
        Null representation the column dtype uses,
        as a tuple ``(kind, value)``
    offset : int, default: 0
        Number of elements to offset from the start of the buffer.
    allow_copy : bool, default: True
        Whether to allow copying the memory to perform the conversion
        (if false then zero-copy approach is requested).

    Returns
    -------
    pa.Array

    Notes
    -----
    The returned array doesn't own the memory. The caller of this function
    is responsible for keeping the memory owner object alive as long as
    the returned PyArrow array is being used.
    rI   validityNoffsetsbaser]   Ur   rF   )	TypeErrorr   foreign_bufferptrbufsizevalidity_buffer_from_maskvalidity_buffer_nan_sentinelro   large_stringstringArrayfrom_buffers)rG   rH   rq   rE   rF   r   	data_buffrY   validity_buffvalidity_dtypeoffset_buffoffset_dtypedata_pa_buffervalidity_pa_buff
data_dtypeoffset_bit_widthoffset_pa_bufferstring_typearrays                      r   rC   rC   Y  s   P 6?LIq(/
(;%~~   $+I$6!\\    &y}i6G,57 7 7N  D4]5C5B5;5;5?A A 88A8E8>8>8BD D y))J 
$0!Q ,[_-8-@2=? ? ? Q<3/++KK2%% o// ikk%%/@	 & 
 
 %%~.	 & 
 
 Ls    ((8 AAr   BufferObjectr   r	   	pa.Bufferc                V   |\  }}|\  }}	}	}	|t           j        k    sJ |t          j        k    rdS |t          j        k    s|t          j        k    r|dk    r	t          j        | j        | j	        |           }
|t          j        k    rn|st          d          t          j                            t          j                    |d|
g|          }t          j        |t          j                              }n6t          j                            t          j                    |d|
g|          }|dk    rt          j        |          }|                                d         S |t          j        k    r'|dk    r!t          j        | j        | j	        |           S t'          | d          )a  
    Build a PyArrow buffer from the passed mask buffer.

    Parameters
    ----------
    validity_buff : BufferObject
        Tuple of underlying validity buffer and associated dtype.
    validity_dtype : Dtype
        Dtype description as a tuple ``(kind, bit-width, format string,
        endianness)``.
    describe_null : ColumnNullType
        Null representation the column dtype uses,
        as a tuple ``(kind, value)``
    length : int
        The number of values in the array.
    offset : int, default: 0
        Number of elements to offset from the start of the buffer.
    allow_copy : bool, default: True
        Whether to allow copying the memory to perform the conversion
        (if false then zero-copy approach is requested).

    Returns
    -------
    pa.Buffer
    Nr   rv   YTo create a bitmask a copy of the data is required which is forbidden by allow_copy=Falsery   r   * null representation is not yet supported.)r   r6   r   NON_NULLABLEUSE_BYTEMASKUSE_BITMASKr   r{   r|   r}   rK   r   r   int8rL   rM   rN   invertrG   r:   )r   r   rE   rq   rF   r   	null_kindsentinel_valvalidity_kindrY   buffmask	mask_bools                r   r~   r~     s   B ,I|+M1aIN****N/// t	n1	1	1^///LA4E4E !2!.!6&35 5 5 333 "F   8((F*.06 ) 8 8D bhjj11II--bhjj&/3Tl5; . = =I 1	),,I  ""1%%	n0	0	0\Q5F5F !2!.!6&35 5 5 	5 "HHHJ J 	Jr!   r   c                D   |\  }}}}t          |          }	|\  }
}|
t          j        k    r|st          d          |t          j        k    r|dk    rt          | d|
 d          t          j        	                    |	|d| g|          }t          j        |          }t          j        |          }|                                d         S |
t          j        k    r|st          d          |t          j        k    rt          j                    }n|	}t          j        	                    ||d| g|          }t          j        ||          }t          j        |          }|                                d         S |
t          j        k    rdS t          | d          )	a  
    Build a PyArrow buffer from NaN or sentinel values.

    Parameters
    ----------
    data_pa_buffer : pa.Buffer
        PyArrow buffer for the column data.
    data_type : Dtype
        Dtype description as a tuple ``(kind, bit-width, format string,
        endianness)``.
    describe_null : ColumnNullType
        Null representation the column dtype uses,
        as a tuple ``(kind, value)``
    length : int
        The number of values in the array.
    offset : int, default: 0
        Number of elements to offset from the start of the buffer.
    allow_copy : bool, default: True
        Whether to allow copying the memory to perform the conversion
        (if false then zero-copy approach is requested).

    Returns
    -------
    pa.Buffer
    r   r   z with rh   Nry   r   r   )ro   r   USE_NANrK   r   r2   r:   r   r   r   rL   is_nanr   rG   USE_SENTINELr4   int64equalr   )r   rH   rE   rq   rF   r   rk   rl   rY   r   r   r   pyarrow_datar   sentinel_dtypesentinel_arrr   s                    r   r   r     s   B &D)Qy))J+I| N*** 	B  
 9?""yB &EEIEEEG G G 800~&	 1  L 9\**D9T??D<<>>!$$ 
n1	1	1 	B  
 9%%%XZZNN'Nx,,^-3.2N-C4: - < < xl;;Il++	  ""1%%	n1	1	1!HHHJ J 	Jr!   )T)r   r   r   r   )r   r   )r   r   r   r)   r   r*   )r>   r?   r   r)   r   r@   )r>   r?   r   r)   r   rO   )r   T)rG   r   rH   rp   rq   rr   rE   r   rF   rr   r   r)   r   r@   )r   r   r   r	   rE   r   rq   rr   rF   rr   r   r)   r   r   )r   r   rH   r	   rE   r   rq   rr   rF   rr   r   r)   r   r   )2
__future__r   typingr   r   pyarrow.interchange.columnr   r   r   pyarrowr   r_   pyarrow.computecomputerL   r	   r   r?   r   r0   r   int16int32r   r1   uint8uint16uint32uint64r2   float16float32float64r6   rN   r3   r   r   __annotations__r    r   r$   r5   r7   r9   rf   ro   rC   r~   r    r!   r   <module>r      s  $ # " " " " " "       
              				       , , , , , ,
  Mwrwyy 

 

 

$ $ N

"""& & O*"*,,$"*,,$"*,,( ( N

 

$ $q)")++&4    $32 32 32 32 32l* * * * *> ,/ ,/ ,/ ,/ ,/b     > $ $ $ $ $R - - - - -`O O O$E E E. f f f f f\ LJ LJ LJ LJ LJh UJ UJ UJ UJ UJ UJ UJr!   