
    q-Ph[2                       d dl mZ d dlZd dlmZmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d d	lmZ e	r8 ej        e          5  d d
lmZ ddd           n# 1 swxY w Y   d dlmZmZmZ  ej        e          5  d dlmZ ddd           n# 1 swxY w Y    G d d          Z G d d          Z G d d          Zd#dZd$dZ G d de          Zd%dZ  G d d e          Z! G d! d"e          Z"dS )&    )annotationsN)MappingSequence)Path)TYPE_CHECKING)col)PartitioningScheme)issue_unstable_warning)Expr)PyExpr)IOAnyCallable)PyPartitioningc                  B    e Zd ZU dZdd	Zded<   ded<   ded<   dd
ZdS )KeyedPartitiona0  
    A key-value pair for a partition.

    .. warning::
        This functionality is currently considered **unstable**. It may be
        changed at any point without it being considered a breaking change.

    See Also
    --------
    PartitionByKey
    PartitionParted
    KeyedPartitionContext
    namestr	str_value	raw_valuer   returnNonec                0    || _         || _        || _        d S Nr   r   r   )selfr   r   r   s       S/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/polars/io/partition.py__init__zKeyedPartition.__init__&   s    	""    c                $    | j          d| j         S )zGet the `key=value`.=)r   r   )r   s    r   	hive_namezKeyedPartition.hive_name/   s    )..dn...r   N)r   r   r   r   r   r   r   r   )r   r   )__name__
__module____qualname____doc__r   __annotations__r"    r   r   r   r      se          # # # #
 IIINNNNNN/ / / / / /r   r   c                  `    e Zd ZU dZddZded<   ded<   ded<   ded<   d	ed<   d	ed
<   ddZdS )KeyedPartitionContexta*  
    Callback context for a partition creation using keys.

    .. warning::
        This functionality is currently considered **unstable**. It may be
        changed at any point without it being considered a breaking change.

    See Also
    --------
    PartitionByKey
    PartitionParted
    file_idxintpart_idxin_part_idxkeyslist[KeyedPartition]	file_pathr   	full_pathr   r   c                Z    || _         || _        || _        || _        || _        || _        d S r   r+   r-   r.   r/   r1   r2   )r   r+   r-   r.   r/   r1   r2   s          r   r   zKeyedPartitionContext.__init__B   s3     ! &	""r   c                    t          | j                  dk    sJ t          | j        d                                                   }| j        dd         D ]&}|t          |                                          z  }'|S )z$The keys mapped to hive directories.r      N)lenr/   r   r"   )r   pkeys      r   	hive_dirszKeyedPartitionContext.hive_dirs[   st    49~~!!!!1''))**9QRR= 	' 	'Ccmmoo&&&AAr   N)r+   r,   r-   r,   r.   r,   r/   r0   r1   r   r2   r   r   r   )r   r   )r#   r$   r%   r&   r   r'   r:   r(   r   r   r*   r*   4   s          # # # #  MMMMMMOOO        r   r*   c                  :    e Zd ZU dZdd	Zded<   ded<   ded<   d
S )BasePartitionContexta  
    Callback context for a partition creation.

    .. warning::
        This functionality is currently considered **unstable**. It may be
        changed at any point without it being considered a breaking change.

    See Also
    --------
    PartitionMaxSize
    r+   r,   r1   r   r2   r   r   c                0    || _         || _        || _        d S r   r+   r1   r2   )r   r+   r1   r2   s       r   r   zBasePartitionContext.__init__q   s     ""r   N)r+   r,   r1   r   r2   r   r   r   )r#   r$   r%   r&   r   r'   r(   r   r   r<   r<   d   s\         
 
# # # #
 MMMOOO     r   r<   file_path_cbICallable[[BasePartitionContext], Path | str | IO[bytes] | IO[str]] | Noner   c                      d S  fdS )Nc           	          t          | j        t          | j                  t          | j                                      S )Nr>   )r<   r+   r   r1   r2   ctxr?   s    r   <lambda>z)_cast_base_file_path_cb.<locals>.<lambda>   sD    ||\3=))3=))	
 	
 	
  r   r(   r?   s   `r   _cast_base_file_path_cbrG   }   s)     t    r   JCallable[[KeyedPartitionContext], Path | str | IO[bytes] | IO[str]] | Nonec                      d S  fdS )Nc                     t          | j        | j        | j        d | j        D             t          | j                  t          | j                                      S )Nc                P    g | ]#}t          |j        |j        |j                   $S )r   )r   r   r   r   ).0kvs     r   
<listcomp>z>_cast_keyed_file_path_cb.<locals>.<lambda>.<locals>.<listcomp>   sF         BLBL    r   r4   )r*   r+   r-   r.   r/   r   r1   r2   rC   s    r   rE   z*_cast_keyed_file_path_cb.<locals>.<lambda>   sp    ||\\  (	   3=))3=))	
 	
 	
  r   r(   rF   s   `r   _cast_keyed_file_path_cbrO      s)     t    r   c                  *     e Zd ZdZddd fdZ xZS )PartitionMaxSizea  
    Partitioning scheme to write files with a maximum size.

    This partitioning scheme generates files that have a given maximum size. If
    the size reaches the maximum size, it is closed and a new file is opened.

    .. warning::
        This functionality is currently considered **unstable**. It may be
        changed at any point without it being considered a breaking change.

    Parameters
    ----------
    base_path
        The base path for the output files.
    file_path
        A callback to register or modify the output path for each partition
        relative to the `base_path`. The callback provides a
        :class:`polars.io.partition.BasePartitionContext` that contains information
        about the partition.

        If no callback is given, it defaults to `{ctx.file_idx}.{EXT}`.
    max_size : int
        The maximum size in rows of each of the generated files.

    Examples
    --------
    Split a parquet file by over smaller CSV files with 100 000 rows each:

    >>> pl.scan_parquet("/path/to/file.parquet").sink_csv(
    ...     PartitionMax("./out", max_size=100_000),
    ... )  # doctest: +SKIP

    See Also
    --------
    PartitionByKey
    PartitionParted
    polars.io.partition.BasePartitionContext
    N)r1   	base_path
str | Pathr1   r@   max_sizer,   r   r   c                   t          d           t                                          t          j        |t          |          |                     d S )N0partitioning strategies are considered unstable.)rR   r?   rT   )r
   superr   r   new_max_sizerG   )r   rR   r1   rT   	__class__s       r   r   zPartitionMaxSize.__init__   s`     	QRRR'#4Y??!  	
 	
 	
 	
 	
r   )rR   rS   r1   r@   rT   r,   r   r   r#   r$   r%   r&   r   __classcell__rY   s   @r   rQ   rQ      sW        % %X 
 
 
 
 
 
 
 
 
 
 
 
r   rQ   by6str | Expr | Sequence[str | Expr] | Mapping[str, Expr]list[PyExpr]c                t   d	dt          | t                    rt          |           j        g}nt          | t                    r	| j        g}nit          | t
                    rfd| D             }nEt          | t                    rd |                                 D             }nd}t          |          |S )
Ni
str | Exprr   r   c                N    t          | t                    rt          |           S | S r   )
isinstancer   r   )ra   s    r   to_exprz_lower_by.<locals>.to_expr   s$    a 	q66MHr   c                0    g | ]} |          j         S r(   )_pyexpr)rL   ere   s     r   rN   z_lower_by.<locals>.<listcomp>   s$    555Qggajj(555r   c                H    g | ]\  }}|                     |          j         S r(   )aliasrg   )rL   nrh   s      r   rN   z_lower_by.<locals>.<listcomp>   s)    @@@TQaggajj(@@@r   zinvalid `by` type)ra   rb   r   r   )	rd   r   r   rg   r   r   r   items	TypeError)r]   
lowered_bymsgre   s      @r   	_lower_byrp      s        "c 
"ggo&

	B		 j\

	B	!	! 5555"555

	B	 	  @@RXXZZ@@@

!nnr   c                  ,     e Zd ZdZdddd fdZ xZS )PartitionByKeya  
    Partitioning scheme to write files split by the values of keys.

    This partitioning scheme generates an arbitrary amount of files splitting
    the data depending on what the value is of key expressions.

    The amount of files that can be written is not limited. However, when
    writing beyond a certain amount of files, the data for the remaining
    partitions is buffered before writing to the file.

    .. warning::
        This functionality is currently considered **unstable**. It may be
        changed at any point without it being considered a breaking change.

    Parameters
    ----------
    base_path
        The base path for the output files.

        Use the `mkdir` option on the `sink_*` methods to ensure directories in
        the path are created.
    file_path
        A callback to register or modify the output path for each partition
        relative to the `base_path`. The callback provides a
        :class:`polars.io.partition.KeyedPartitionContext` that contains information
        about the partition.

        If no callback is given, it defaults to
        `{ctx.keys.hive_dirs()}/{ctx.in_part_idx}.{EXT}`.
    by
        The expressions to partition by.
    include_key : bool
        Whether to include the key columns in the output files.

    Examples
    --------
    Split into a hive-partitioning style partition:

    >>> (
    ...     pl.DataFrame({"a": [1, 2, 3], "b": [5, 7, 9], "c": ["A", "B", "C"]})
    ...     .lazy()
    ...     .sink_parquet(
    ...         PartitionByKey(
    ...             "./out",
    ...             by=[pl.col.a, pl.col.b],
    ...             include_key=False,
    ...         ),
    ...         mkdir=True,
    ...     )
    ... )  # doctest: +SKIP

    Split a parquet file by a column `year` into CSV files:

    >>> pl.scan_parquet("/path/to/file.parquet").sink_csv(
    ...     PartitionByKey(
    ...         "./out/",
    ...         file_path=lambda ctx: f"year={ctx.keys[0].str_value}.csv",
    ...         by="year",
    ...     ),
    ... )  # doctest: +SKIP

    See Also
    --------
    PartitionMaxSize
    PartitionParted
    polars.io.partition.KeyedPartitionContext
    NTr1   include_keyrR   rS   r1   rH   r]   r^   rt   boolr   r   c                   t          d           t          |          }t                                          t	          j        |t          |          ||                     d S NrV   )rR   r?   r]   rt   r
   rp   rW   r   r   
new_by_keyrO   r   rR   r1   r]   rt   rn   rY   s         r   r   zPartitionByKey.__init__;  n     	QRRRr]]
%#5i@@'	  	
 	
 	
 	
 	
r   
rR   rS   r1   rH   r]   r^   rt   ru   r   r   rZ   r\   s   @r   rr   rr      s\        B BR  
 
 
 
 
 
 
 
 
 
 
 
r   rr   c                  ,     e Zd ZdZdddd fdZ xZS )PartitionParteda  
    Partitioning scheme to split parted dataframes.

    This is a specialized version of :class:`PartitionByKey`. Where as
    :class:`PartitionByKey` accepts data in any order, this scheme expects the input
    data to be pre-grouped or pre-sorted. This scheme suffers a lot less overhead than
    :class:`PartitionByKey`, but may not be always applicable.

    Each new value of the key expressions starts a new partition, therefore repeating
    the same value multiple times may overwrite previous partitions.

    .. warning::
        This functionality is currently considered **unstable**. It may be
        changed at any point without it being considered a breaking change.

    Parameters
    ----------
    base_path
        The base path for the output files.

        Use the `mkdir` option on the `sink_*` methods to ensure directories in
        the path are created.
    file_path
        A callback to register or modify the output path for each partition
        relative to the `base_path`.The callback provides a
        :class:`polars.io.partition.KeyedPartitionContext` that contains information
        about the partition.

        If no callback is given, it defaults to
        `{ctx.keys.hive_dirs()}/{ctx.in_part_idx}.{EXT}`.
    by
        The expressions to partition by.
    include_key : bool
        Whether to include the key columns in the output files.

    Examples
    --------
    Split a parquet file by a column `year` into CSV files:

    >>> pl.scan_parquet("/path/to/file.parquet").sink_csv(
    ...     PartitionParted("./out", by="year"),
    ...     mkdir=True,
    ... )  # doctest: +SKIP

    See Also
    --------
    PartitionMaxSize
    PartitionByKey
    polars.io.partition.KeyedPartitionContext
    NTrs   rR   rS   r1   rH   r]   r^   rt   ru   r   r   c                   t          d           t          |          }t                                          t	          j        |t          |          ||                     d S rw   rx   rz   s         r   r   zPartitionParted.__init__  r{   r   r|   rZ   r\   s   @r   r~   r~   Q  sZ        1 1p  
 
 
 
 
 
 
 
 
 
 
 
r   r~   )r?   r@   r   r@   )r?   rH   r   rH   )r]   r^   r   r_   )#
__future__r   
contextlibcollections.abcr   r   pathlibr   typingr   polarsr   polars._typingr	   polars._utils.unstabler
   polars.exprr   suppressImportErrorpolars.polarsr   r   r   r   r   r   r*   r<   rG   rO   rQ   rp   rr   r~   r(   r   r   <module>r      s   " " " " " "     - - - - - - - -                         - - - - - - 9 9 9 9 9 9       )		[	)	) ) )(((((() ) ) ) ) ) ) ) ) ) ) ) ) ) ) )(((((((((Z%% - -,,,,,,- - - - - - - - - - - - - - -/ / / / / / / /:- - - - - - - -`       2      .7
 7
 7
 7
 7
) 7
 7
 7
t   2X
 X
 X
 X
 X
' X
 X
 X
vG
 G
 G
 G
 G
( G
 G
 G
 G
 G
s$   	AA #A BBB