
    q-PhH              	      J   d dl mZ d dlZd dlmZ d dlmZ d dlmZmZ d dl	m
Z
 d dlmZ d dlmZmZ d d	lmZ d d
lmZmZ d dlmZ d dlmZ d dlmZ erd dlmZ d dlmZ d dlmZmZm Z  d dl!m"Z" dddddddddd9d'Z#dddddddd(d:d*Z$d+d,d;d0Z%	 	 	 d<d=d3Z&d>d5Z'd?d8Z(dS )@    )annotationsN)datetime)Path)TYPE_CHECKINGAny)urlparse)
from_arrow)NullTime)unpack_dtypes)_DELTALAKE_AVAILABLE	deltalake)scan_parquet)scan_pyarrow_dataset)Schema)Literal
DeltaTable)	DataFrameDataType	LazyFrame)CredentialProviderFunctionautoF)versioncolumnsrechunkstorage_optionscredential_providerdelta_table_optionsuse_pyarrowpyarrow_optionssourcestr | DeltaTabler   int | str | datetime | Noner   list[str] | Noner   bool | Noner   dict[str, Any] | Noner   3CredentialProviderFunction | Literal['auto'] | Noner   r    boolr!   returnr   c          
         t          | |||||||          }	||	                    |          }	|	                                S )a7  
    Reads into a DataFrame from a Delta lake table.

    Parameters
    ----------
    source
        DeltaTable or a Path or URI to the root of the Delta lake table.

        Note: For Local filesystem, absolute and relative paths are supported but
        for the supported object storages - GCS, Azure and S3 full URI must be provided.
    version
        Numerical version or timestamp version of the Delta lake table.

        Note: If `version` is not provided, the latest version of delta lake
        table is read.
    columns
        Columns to select. Accepts a list of column names.
    rechunk
        Make sure that all columns are contiguous in memory by
        aggregating the chunks into a single array.
    storage_options
        Extra options for the storage backends supported by `deltalake`.
        For cloud storages, this may include configurations for authentication etc.

        More info is available `here
        <https://delta-io.github.io/delta-rs/usage/loading-table/>`__.
    credential_provider
        Provide a function that can be called to provide cloud storage
        credentials. The function is expected to return a dictionary of
        credential keys along with an optional credential expiry time.

        .. warning::
            This functionality is considered **unstable**. It may be changed
            at any point without it being considered a breaking change.
    delta_table_options
        Additional keyword arguments while reading a Delta lake Table.
    use_pyarrow
        Flag to enable pyarrow dataset reads.
    pyarrow_options
        Keyword arguments while converting a Delta lake Table to pyarrow table.

    Returns
    -------
    DataFrame

    Examples
    --------
    Reads a Delta table from local filesystem.
    Note: Since version is not provided, the latest version of the delta table is read.

    >>> table_path = "/path/to/delta-table/"
    >>> pl.read_delta(table_path)  # doctest: +SKIP

    Reads a specific version of the Delta table from local filesystem.
    Note: This will fail if the provided version of the delta table does not exist.

    >>> pl.read_delta(table_path, version=1)  # doctest: +SKIP

    Time travel a delta table from local filesystem using a timestamp version.

    >>> pl.read_delta(
    ...     table_path, version=datetime(2020, 1, 1, tzinfo=timezone.utc)
    ... )  # doctest: +SKIP

    Reads a Delta table from AWS S3.
    See a list of supported storage options for S3 `here
    <https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html#variants>`__.

    >>> table_path = "s3://bucket/path/to/delta-table/"
    >>> storage_options = {
    ...     "AWS_ACCESS_KEY_ID": "THE_AWS_ACCESS_KEY_ID",
    ...     "AWS_SECRET_ACCESS_KEY": "THE_AWS_SECRET_ACCESS_KEY",
    ... }
    >>> pl.read_delta(table_path, storage_options=storage_options)  # doctest: +SKIP

    Reads a Delta table from Google Cloud storage (GCS).
    See a list of supported storage options for GCS `here
    <https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html#variants>`__.

    >>> table_path = "gs://bucket/path/to/delta-table/"
    >>> storage_options = {"SERVICE_ACCOUNT": "SERVICE_ACCOUNT_JSON_ABSOLUTE_PATH"}
    >>> pl.read_delta(table_path, storage_options=storage_options)  # doctest: +SKIP

    Reads a Delta table from Azure.

    Following type of table paths are supported,

    * az://<container>/<path>
    * adl://<container>/<path>
    * abfs://<container>/<path>

    See a list of supported storage options for Azure `here
    <https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html#variants>`__.

    >>> table_path = "az://container/path/to/delta-table/"
    >>> storage_options = {
    ...     "AZURE_STORAGE_ACCOUNT_NAME": "AZURE_STORAGE_ACCOUNT_NAME",
    ...     "AZURE_STORAGE_ACCOUNT_KEY": "AZURE_STORAGE_ACCOUNT_KEY",
    ... }
    >>> pl.read_delta(table_path, storage_options=storage_options)  # doctest: +SKIP

    Reads a Delta table with additional delta specific options. In the below example,
    `without_files` option is used which loads the table without file tracking
    information.

    >>> table_path = "/path/to/delta-table/"
    >>> delta_table_options = {"without_files": True}
    >>> pl.read_delta(
    ...     table_path, delta_table_options=delta_table_options
    ... )  # doctest: +SKIP
    )r"   r   r   r   r   r    r!   r   )
scan_deltaselectcollect)
r"   r   r   r   r   r   r   r    r!   dfs
             O/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/polars/io/delta.py
read_deltar1      sX    v 
'//'	
 	
 	
B YYw::<<    )r   r   r   r   r    r!   r   r   c          
        t                       i }ddlm}	 ddlm}
 ddlm} t          | |	          s |
|| |d          }n||dk    rd}t          |          d}~|r!|	                                x}r ||          }t          | |||i |pi |nd|	          }|r |pi } |j        d i |}t          |          S |d
}t          |          ddl}ddlm} ddlm}m}m} |                                }|j        |k    s|j        |k    rd|j         d| d| } ||          |j        dk    rG|j        @h |j                            |          }t1          |          dk    rd| d} ||          |                                                    d          }t7          |j                            g |                    j        }|                                j        }d!d} |||          \  }}|                                 }|j!        "                    d          rd |D             }tG          ||t1          |          dk    r|nddt1          |          dk    |||pd          S )"aY  
    Lazily read from a Delta lake table.

    Parameters
    ----------
    source
        DeltaTable or a Path or URI to the root of the Delta lake table.

        Note: For Local filesystem, absolute and relative paths are supported but
        for the supported object storages - GCS, Azure and S3 full URI must be provided.
    version
        Numerical version or timestamp version of the Delta lake table.

        Note: If `version` is not provided, the latest version of delta lake
        table is read.
    storage_options
        Extra options for the storage backends supported by `deltalake`.
        For cloud storages, this may include configurations for authentication etc.

        More info is available `here
        <https://delta-io.github.io/delta-rs/usage/loading-table/>`__.
    credential_provider
        Provide a function that can be called to provide cloud storage
        credentials. The function is expected to return a dictionary of
        credential keys along with an optional credential expiry time.

        .. warning::
            This functionality is considered **unstable**. It may be changed
            at any point without it being considered a breaking change.
    delta_table_options
        Additional keyword arguments while reading a Delta lake Table.
    use_pyarrow
        Flag to enable pyarrow dataset reads.
    pyarrow_options
        Keyword arguments while converting a Delta lake Table to pyarrow table.
        Use this parameter when filtering on partitioned columns or to read
        from a 'fsspec' supported filesystem.
    rechunk
        Make sure that all columns are contiguous in memory by
        aggregating the chunks into a single array.

    Returns
    -------
    LazyFrame

    Examples
    --------
    Creates a scan for a Delta table from local filesystem.
    Note: Since version is not provided, the latest version of the delta table is read.

    >>> table_path = "/path/to/delta-table/"
    >>> pl.scan_delta(table_path).collect()  # doctest: +SKIP

    Creates a scan for a specific version of the Delta table from local filesystem.
    Note: This will fail if the provided version of the delta table does not exist.

    >>> pl.scan_delta(table_path, version=1).collect()  # doctest: +SKIP

    Time travel a delta table from local filesystem using a timestamp version.

    >>> pl.scan_delta(
    ...     table_path, version=datetime(2020, 1, 1, tzinfo=timezone.utc)
    ... ).collect()  # doctest: +SKIP

    Creates a scan for a Delta table from AWS S3.
    See a list of supported storage options for S3 `here
    <https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html#variants>`__.

    >>> table_path = "s3://bucket/path/to/delta-table/"
    >>> storage_options = {
    ...     "AWS_REGION": "eu-central-1",
    ...     "AWS_ACCESS_KEY_ID": "THE_AWS_ACCESS_KEY_ID",
    ...     "AWS_SECRET_ACCESS_KEY": "THE_AWS_SECRET_ACCESS_KEY",
    ... }
    >>> pl.scan_delta(
    ...     table_path, storage_options=storage_options
    ... ).collect()  # doctest: +SKIP

    Creates a scan for a Delta table from Google Cloud storage (GCS).
    See a list of supported storage options for GCS `here
    <https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html#variants>`__.

    >>> table_path = "gs://bucket/path/to/delta-table/"
    >>> storage_options = {"SERVICE_ACCOUNT": "SERVICE_ACCOUNT_JSON_ABSOLUTE_PATH"}
    >>> pl.scan_delta(
    ...     table_path, storage_options=storage_options
    ... ).collect()  # doctest: +SKIP

    Creates a scan for a Delta table from Azure.
    Supported options for Azure are available `here
    <https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html#variants>`__.

    Following type of table paths are supported,

    * az://<container>/<path>
    * adl://<container>/<path>
    * abfs[s]://<container>/<path>

    >>> table_path = "az://container/path/to/delta-table/"
    >>> storage_options = {
    ...     "AZURE_STORAGE_ACCOUNT_NAME": "AZURE_STORAGE_ACCOUNT_NAME",
    ...     "AZURE_STORAGE_ACCOUNT_KEY": "AZURE_STORAGE_ACCOUNT_KEY",
    ... }
    >>> pl.scan_delta(
    ...     table_path, storage_options=storage_options
    ... ).collect()  # doctest: +SKIP

    Creates a scan for a Delta table with additional delta specific options.
    In the below example, `without_files` option is used which loads the table without
    file tracking information.

    >>> table_path = "/path/to/delta-table/"
    >>> delta_table_options = {"without_files": True}
    >>> pl.scan_delta(
    ...     table_path, delta_table_options=delta_table_options
    ... ).collect()  # doctest: +SKIP
    r   r   )!_init_credential_provider_builder)+_get_credentials_from_provider_expiry_awarer,   Nr   z?cannot use credential_provider when passing a DeltaTable object)
table_pathr   r   r   z7To make use of pyarrow_options, set use_pyarrow to True)DeltaProtocolError)MAX_SUPPORTED_READER_VERSIONNOT_SUPPORTED_READER_VERSIONSUPPORTED_READER_FEATURESz&The table's minimum reader version is z5 but polars delta scanner only supports version 1 or z with these reader features:    z)The table has set these reader features: z= but these are not yet supported by the polars delta scanner.T)as_large_typesschemar   partition_columns	list[str]r*   tuple[Schema, Schema]c                (   t          |          dk    r| t          g           fS g }g }|                                 D ]8\  }}||v r|                    ||f           !|                    ||f           9t          |          t          |          fS )Nr   )lenr   itemsappend)r=   r>   main_schemahive_schemanamedtypes         r0   _split_schemaz!scan_delta.<locals>._split_schema|  s      !!Q&&6"::%%!<<>> 	2 	2KD%(((""D%=1111""D%=1111k""F;$7$777r2   	lakefs://c                :    g | ]}|                     d d          S )rJ   zs3://)replace).0file_uris     r0   
<listcomp>zscan_delta.<locals>.<listcomp>  s(    VVVX%%k7;;VVVr2   F)r=   rF   allow_missing_columnshive_partitioningr   r   r    )r=   r   r>   r?   r*   r@   )$_check_if_delta_availabler   r   ,polars.io.cloud.credential_provider._builderr4   .polars.io.cloud.credential_provider._providersr5   
isinstance
ValueErrorbuild_credential_provider_get_delta_lake_tableto_pyarrow_datasetr   pyarrowdeltalake.exceptionsr7   deltalake.tabler8   r9   r:   protocolmin_reader_versionreader_features
differencerB   r=   
to_pyarrowr	   Tablefrom_pylistmetadatar>   	file_uris	table_uri
startswithr   )r"   r   r   r   r   r    r!   r   credential_provider_credsr   r4   r5   credential_provider_buildermsgproviderdl_tblpa_dspar7   r8   r9   r:   table_protocolmissing_featuresdelta_schemapolars_schemar>   rI   rE   rF   rf   s                                  r0   r,   r,      s    @  "$$$$$$           fj)) +&G&G,'
 '
## 
	(-@F-J-JOoo&*#" 
/IIKKK
 %P$O%
 %
! # *.I.U E%2D*CDD/	 	 	F  +)/R))<<O<<#E***"Goo777777          __&&N),HHH,0LLLZ^5V Z ZC_Z Z XZ Z 	 ! %%%)Q..*6<^;<GG%
 
   1$$ N>N  N  N  NC$$S))) ==??--T-BBLrx33BEEFFMM));8 8 8 8"  -}]<MNNK  ""I "";// WVVIVVV	#&'8#9#9A#=#=KK4"/0014'7 5	 	 	 	r2   T)strictrg   strrt   c                   t          |           }t          |j        dk    r4t          |                                                               |          n|           }|S )N )r   ru   schemer   
expanduserresolve)rg   rt   parsed_resultresolved_uris       r0   _resolve_delta_lake_urir}     s_    Y''M2%% 	Y""$$,,V444 L r2   r6   deltalake.DeltaTablec                   t                       t          | t          j                  r6t	          |du|du|dug          rt          j        dt          d           | S |i }t          |           }t          |t          t          f          st          j        |f||d|}n)t          j        | fd|i|}|                    |           |S )z
    Initialize a Delta lake table for use in read and scan operations.

    Notes
    -----
    Make sure to install deltalake>=0.8.0. Read the documentation
    `here <https://delta-io.github.io/delta-rs/usage/installation/>`_.
    NzWhen supplying a DeltaTable directly, `version`, `storage_options`, and `delta_table_options` are ignored.
                To silence this warning, don't supply those parameters.   )
stacklevel)r   r   r   )rS   rV   r   r   anywarningswarnRuntimeWarningr}   ru   r   load_as_version)r6   r   r   r   r|   rm   s         r0   rY   rY     s,    *i233 t#t+#4/
 
 	 MK	    " *:66LgX// (%
+
 
 "	
 
 %
 
+
 "
 

 	w'''Mr2   Nonec                 6    t           sd} t          |           d S )Nz=deltalake is not installed

Please run: pip install deltalake)r   ModuleNotFoundError)rk   s    r0   rS   rS     s'     'O!#&&&' 'r2   dtypeslist[DataType]c                j    t          |  }t          t          h}||z  x}rd|}t          |          d S )Nz+dataframe contains unsupported data types: )r   r   r
   	TypeError)r   schema_dtypesunsupported_typesoverlaprk   s        r0   _check_for_unsupported_typesr     sN    !6*Mt
  "333w GGGGnn r2   )r"   r#   r   r$   r   r%   r   r&   r   r'   r   r(   r   r'   r    r)   r!   r'   r*   r   )r"   r#   r   r$   r   r'   r   r(   r   r'   r    r)   r!   r'   r   r&   r*   r   )rg   ru   rt   r)   r*   ru   )NNN)
r6   r#   r   r$   r   r'   r   r'   r*   r~   )r*   r   )r   r   r*   r   ))
__future__r   r   r   pathlibr   typingr   r   urllib.parser   polars.convertr	   polars.datatypesr
   r   polars.datatypes.convertr   polars.dependenciesr   r   polars.io.parquetr   #polars.io.pyarrow_dataset.functionsr   polars.schemar   r   r   polarsr   r   r   polars.io.cloudr   r1   r,   r}   rY   rS   r   rR   r2   r0   <module>r      sJ   " " " " " "              % % % % % % % % ! ! ! ! ! ! % % % % % % ' ' ' ' ' ' ' ' 2 2 2 2 2 2 ? ? ? ? ? ? ? ? * * * * * * D D D D D D             ;$$$$$$5555555555:::::: ,0 $-1OU15-1H H H H H H\ ,0-1OU15-1z z z z z zz ?C 	 	 	 	 	 	 ,0-115	1 1 1 1 1h' ' ' '	 	 	 	 	 	r2   