
    y-Ph&                         d dl Z d dlmZ d dlmZmZmZmZ d dlmZ	 d dl
mZ d dlmZ  G d d          Zd Zh d	Z	 	 ddZ	 	 ddZddZdS )    N)_pandas_api)CodecTableconcat_tablesschema)_feather)FeatherErrorc                   0    e Zd ZdZddZd	dZd Zd
dZdS )FeatherDataseta  
    Encapsulates details of reading a list of Feather files.

    Parameters
    ----------
    path_or_paths : List[str]
        A list of file names
    validate_schema : bool, default True
        Check that individual file schemas are all the same / compatible
    Tc                 "    || _         || _        d S N)pathsvalidate_schema)selfpath_or_pathsr   s      O/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/pyarrow/feather.py__init__zFeatherDataset.__init__)   s    "
.    Nc                 >   t          | j        d         |          }|g| _        |j        | _        | j        dd         D ]J}t          ||          }| j        r|                     ||           | j                            |           Kt          | j                  S )a,  
        Read multiple feather files as a single pyarrow.Table

        Parameters
        ----------
        columns : List[str]
            Names of columns to read from the file

        Returns
        -------
        pyarrow.Table
            Content of the file as a table (of columns)
        r   columns   N)
read_tabler   _tablesr   r   validate_schemasappendr   )r   r   _filpathtables        r   r   zFeatherDataset.read_table-   s     $*Q-999vkJqrrN 	' 	'DtW555E# 3%%dE222L&&&&T\***r   c                     | j                             |j                   s.t          d                    || j         |j                             d S )Nz-Schema in {!s} was different. 
{!s}

vs

{!s})r   equals
ValueErrorformat)r   piecer   s      r   r   zFeatherDataset.validate_schemasF   sW    {!!%,// 	4 2$fUDK%*\3 34 4 4	4 	4r   c                 V    |                      |                              |          S )a  
        Read multiple Parquet files as a single pandas DataFrame

        Parameters
        ----------
        columns : List[str]
            Names of columns to read from the file
        use_threads : bool, default True
            Use multiple threads when converting to pandas

        Returns
        -------
        pandas.DataFrame
            Content of the file as a pandas DataFrame (of columns)
        r   )use_threadsr   	to_pandas)r   r   r&   s      r   read_pandaszFeatherDataset.read_pandasM   s2      w//99# : % % 	%r   )Tr   )NT)__name__
__module____qualname____doc__r   r   r   r)    r   r   r   r      si        	 	/ / / /+ + + +24 4 4% % % % % %r   r   c                 $   |j         dk    rd S |j        t          j                    t          j                    fv r"t          d                    |                     t          d                    | t          |j                                      )Nr   zqColumn '{}' exceeds 2GB maximum capacity of a Feather binary column. This restriction may be lifted in the futurezkColumn '{}' of type {} was chunked on conversion to Arrow and cannot be currently written to Feather format)
num_chunkstypeextbinarystringr"   r#   str)namecols     r   check_chunked_overflowr8   a   s    
~
xCJLL#*,,/// 006t> > 	>
  **0&s38}}*E*EG G 	Gr   >   lz4zstduncompressed   c                    t           j        r?t           j        r3t          | t           j        j                  r|                                 } t          j        |           ru|dk    rd}n|dk    rd}nt          d          t          j
        | |          }|dk    r7t          |j        j                  D ]\  }}	||         }
t          |	|
           n| }|dk    rit          |j                  t          t#          |j                            k    rt          d          |t          d          |t          d	          nL|t%          j        d
          rd}n3|1|t(          vr(t          d                    |t(                              	 t-          j        ||||||           dS # t0          $ rB t          |t2                    r+	 t5          j        |           n# t4          j        $ r Y nw xY w w xY w)a  
    Write a pandas.DataFrame to Feather format.

    Parameters
    ----------
    df : pandas.DataFrame or pyarrow.Table
        Data to write out as Feather format.
    dest : str
        Local destination path.
    compression : string, default None
        Can be one of {"zstd", "lz4", "uncompressed"}. The default of None uses
        LZ4 for V2 files if it is available, otherwise uncompressed.
    compression_level : int, default None
        Use a compression level particular to the chosen compressor. If None
        use the default compression level
    chunksize : int, default None
        For V2 files, the internal maximum size of Arrow RecordBatch chunks
        when writing the Arrow IPC file format. None means use the default,
        which is currently 64K
    version : int, default 2
        Feather file version. Version 2 is the current. Version 1 is the more
        limited legacy format
    r   Fr<   Nz%Version value should either be 1 or 2)preserve_indexz'cannot serialize duplicate column namesz2Feather V1 files do not support compression optionz0Feather V1 files do not support chunksize option	lz4_framer9   z1compression="{}" not supported, must be one of {})compressioncompression_level	chunksizeversion)r   have_pandas
has_sparse
isinstancepdSparseDataFrameto_denseis_data_framer"   r   from_pandas	enumerater   namesr8   lencolumn_namessetr   is_available_FEATHER_SUPPORTED_CODECSr#   r   write_feather	Exceptionr5   osremoveerror)dfdestr@   rA   rB   rC   r>   r   ir6   r7   s              r   rS   rS   s   sj   2  " 	2{~=>>	B $$  a<<"NN\\!NNDEEE!"^DDDa<<$U\%788 2 24Ah&tS1111!||u!""SU-?)@)@%A%AAAFGGG" & ' ' '   & ' ' ' ! 5#5k#B#BKK%!::: ))/0I*K *KL L L
ud1B)2G	E 	E 	E 	E 	E 	E    dC   		$8   s0   F/ /G;G$#G;$G63G;5G66G;TFc                 B     t          | |||          j        dd|i|S )a  
    Read a pandas.DataFrame from Feather format. To read as pyarrow.Table use
    feather.read_table.

    Parameters
    ----------
    source : str file path, or file-like object
        You can use MemoryMappedFile as source, for explicitly use memory map.
    columns : sequence, optional
        Only read a specific set of columns. If not provided, all columns are
        read.
    use_threads : bool, default True
        Whether to parallelize reading using multiple threads. If false the
        restriction is used in the conversion to Pandas as well as in the
        reading from Feather format.
    memory_map : boolean, default False
        Use memory mapping when opening file on disk, when source is a str.
    **kwargs
        Additional keyword arguments passed on to `pyarrow.Table.to_pandas`.

    Returns
    -------
    df : pandas.DataFrame
        The contents of the Feather file as a pandas.DataFrame
    )r   
memory_mapr&   r&   r.   r'   )sourcer   r&   r\   kwargss        r   read_featherr_      sO    6+JJ! ! !!*N N7BNFLN N Or   c                 *   t          j        | ||          }||                                S d |D             }t          t	          d |                    r|                    |          }nct          t	          d |                    r|                    |          }n/d |D             }t          d                    ||                    |j	        dk     r|S t          t          |                    |k    r|S |                    |          S )	a  
    Read a pyarrow.Table from Feather format

    Parameters
    ----------
    source : str file path, or file-like object
        You can use MemoryMappedFile as source, for explicitly use memory map.
    columns : sequence, optional
        Only read a specific set of columns. If not provided, all columns are
        read.
    memory_map : boolean, default False
        Use memory mapping when opening file on disk, when source is a str
    use_threads : bool, default True
        Whether to parallelize reading using multiple threads.

    Returns
    -------
    table : pyarrow.Table
        The contents of the Feather file as a pyarrow.Table
    )use_memory_mapr&   Nc                 ,    g | ]}t          |          S r.   )r1   ).0columns     r   
<listcomp>zread_table.<locals>.<listcomp>  s    777VDLL777r   c                     | t           k    S r   )intts    r   <lambda>zread_table.<locals>.<lambda>  s
    c r   c                     | t           k    S r   )r5   rh   s    r   rj   zread_table.<locals>.<lambda>  s
    18 r   c                     g | ]	}|j         
S r.   )r*   )rc   ri   s     r   re   zread_table.<locals>.<listcomp>  s    >>>AQZ>>>r   z<Columns must be indices or names. Got columns {} of types {}   )r   FeatherReaderreadallmapread_indices
read_names	TypeErrorr#   rC   sortedrP   select)r]   r   r\   r&   readercolumn_typesr   column_type_namess           r   r   r      s4   * #z{D D DF {{}}77w777L
3!!<0011 =##G,,	S##\22	3	3 =!!'**>>>>> 5):;;= = 	=
 ~	G			(	( ||G$$$r   )NNNr<   )NTF)NFT)rU   pyarrow.pandas_compatr   pyarrow.libr   r   r   r   libr2   pyarrowr   pyarrow._featherr	   r   r8   rR   rS   r_   r   r.   r   r   <module>r      sG  & 
			 - - - - - -0 0 0 0 0 0 0 0 0 0 0 0             ) ) ) ) ) )A% A% A% A% A% A% A% A%HG G G <;;  AE*+Q Q Q Qh 48!O O O O@.% .% .% .% .% .%r   