§
    q-Ph*  ã                  ó`   — d dl mZ d dlmZ d dlmZ d dlmZ erd dlm	Z	  G d„ d¦  «        Z
dS )	é    )Úannotations)ÚTYPE_CHECKING)Úqualified_type_name)Ú	wrap_expr)ÚExprc                  óP   — e Zd ZdZdZdd„Zdd„Zdd	„Zdd
„Zdd„Z	dd„Z
ddd„ZdS )ÚExprCatNameSpacez.Namespace for categorical related expressions.ÚcatÚexprr   ÚreturnÚNonec                ó   — |j         | _         d S ©N)Ú_pyexpr)Úselfr   s     úW/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/polars/expr/categorical.pyÚ__init__zExprCatNameSpace.__init__   s   € Ø”|ˆŒˆˆó    c                óN   — t          | j                             ¦   «         ¦  «        S )u  
        Get the categories stored in this data type.

        Examples
        --------
        >>> df = pl.Series(
        ...     "cats", ["foo", "bar", "foo", "foo", "ham"], dtype=pl.Categorical
        ... ).to_frame()
        >>> df.select(pl.col("cats").cat.get_categories())
        shape: (3, 1)
        â”Œâ”€â”€â”€â”€â”€â”€â”
        â”‚ cats â”‚
        â”‚ ---  â”‚
        â”‚ str  â”‚
        â•žâ•â•â•â•â•â•â•¡
        â”‚ foo  â”‚
        â”‚ bar  â”‚
        â”‚ ham  â”‚
        â””â”€â”€â”€â”€â”€â”€â”˜
        )r   r   Úcat_get_categories©r   s    r   Úget_categorieszExprCatNameSpace.get_categories   s    € õ* ˜œ×8Ò8Ñ:Ô:Ñ;Ô;Ð;r   c                óN   — t          | j                             ¦   «         ¦  «        S )uÔ  
        Return the byte-length of the string representation of each value.

        Returns
        -------
        Expr
            Expression of data type :class:`UInt32`.

        See Also
        --------
        len_chars

        Notes
        -----
        When working with non-ASCII text, the length in bytes is not the same as the
        length in characters. You may want to use :func:`len_chars` instead.
        Note that :func:`len_bytes` is much more performant (_O(1)_) than
        :func:`len_chars` (_O(n)_).

        Examples
        --------
        >>> df = pl.DataFrame(
        ...     {"a": pl.Series(["CafÃ©", "345", "æ±äº¬", None], dtype=pl.Categorical)}
        ... )
        >>> df.with_columns(
        ...     pl.col("a").cat.len_bytes().alias("n_bytes"),
        ...     pl.col("a").cat.len_chars().alias("n_chars"),
        ... )
        shape: (4, 3)
        â”Œâ”€â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
        â”‚ a    â”† n_bytes â”† n_chars â”‚
        â”‚ ---  â”† ---     â”† ---     â”‚
        â”‚ cat  â”† u32     â”† u32     â”‚
        â•žâ•â•â•â•â•â•â•ªâ•â•â•â•â•â•â•â•â•â•ªâ•â•â•â•â•â•â•â•â•â•¡
        â”‚ CafÃ© â”† 5       â”† 4       â”‚
        â”‚ 345  â”† 3       â”† 3       â”‚
        â”‚ æ±äº¬ â”† 6       â”† 2       â”‚
        â”‚ null â”† null    â”† null    â”‚
        â””â”€â”€â”€â”€â”€â”€â”´â”€â”€â”€â”€â”€â”€â”€â”€â”€â”´â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
        )r   r   Úcat_len_bytesr   s    r   Ú	len_byteszExprCatNameSpace.len_bytes+   s!   € õR ˜œ×3Ò3Ñ5Ô5Ñ6Ô6Ð6r   c                óN   — t          | j                             ¦   «         ¦  «        S )uÅ  
        Return the number of characters of the string representation of each value.

        Returns
        -------
        Expr
            Expression of data type :class:`UInt32`.

        See Also
        --------
        len_bytes

        Notes
        -----
        When working with ASCII text, use :func:`len_bytes` instead to achieve
        equivalent output with much better performance:
        :func:`len_bytes` runs in _O(1)_, while :func:`len_chars` runs in (_O(n)_).

        A character is defined as a `Unicode scalar value`_. A single character is
        represented by a single byte when working with ASCII text, and a maximum of
        4 bytes otherwise.

        .. _Unicode scalar value: https://www.unicode.org/glossary/#unicode_scalar_value

        Examples
        --------
        >>> df = pl.DataFrame(
        ...     {"a": pl.Series(["CafÃ©", "345", "æ±äº¬", None], dtype=pl.Categorical)}
        ... )
        >>> df.with_columns(
        ...     pl.col("a").cat.len_chars().alias("n_chars"),
        ...     pl.col("a").cat.len_bytes().alias("n_bytes"),
        ... )
        shape: (4, 3)
        â”Œâ”€â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
        â”‚ a    â”† n_chars â”† n_bytes â”‚
        â”‚ ---  â”† ---     â”† ---     â”‚
        â”‚ cat  â”† u32     â”† u32     â”‚
        â•žâ•â•â•â•â•â•â•ªâ•â•â•â•â•â•â•â•â•â•ªâ•â•â•â•â•â•â•â•â•â•¡
        â”‚ CafÃ© â”† 4       â”† 5       â”‚
        â”‚ 345  â”† 3       â”† 3       â”‚
        â”‚ æ±äº¬ â”† 2       â”† 6       â”‚
        â”‚ null â”† null    â”† null    â”‚
        â””â”€â”€â”€â”€â”€â”€â”´â”€â”€â”€â”€â”€â”€â”€â”€â”€â”´â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
        )r   r   Úcat_len_charsr   s    r   Ú	len_charszExprCatNameSpace.len_charsV   s!   € õ\ ˜œ×3Ò3Ñ5Ô5Ñ6Ô6Ð6r   ÚprefixÚstrc                ó¼   — t          |t          ¦  «        s!dt          |¦  «        ›}t          |¦  «        ‚t	          | j                             |¦  «        ¦  «        S )uO  
        Check if string representations of values start with a substring.

        Parameters
        ----------
        prefix
            Prefix substring.

        See Also
        --------
        contains : Check if string repr contains a substring that matches a pattern.
        ends_with : Check if string repr end with a substring.

        Notes
        -----
        Whereas `str.starts_with` allows expression inputs, `cat.starts_with` requires
        a literal string value.

        Examples
        --------
        >>> df = pl.DataFrame(
        ...     {"fruits": pl.Series(["apple", "mango", None], dtype=pl.Categorical)}
        ... )
        >>> df.with_columns(
        ...     pl.col("fruits").cat.starts_with("app").alias("has_prefix"),
        ... )
        shape: (3, 2)
        â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
        â”‚ fruits â”† has_prefix â”‚
        â”‚ ---    â”† ---        â”‚
        â”‚ cat    â”† bool       â”‚
        â•žâ•â•â•â•â•â•â•â•â•ªâ•â•â•â•â•â•â•â•â•â•â•â•â•¡
        â”‚ apple  â”† true       â”‚
        â”‚ mango  â”† false      â”‚
        â”‚ null   â”† null       â”‚
        â””â”€â”€â”€â”€â”€â”€â”€â”€â”´â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜

        Using `starts_with` as a filter condition:

        >>> df.filter(pl.col("fruits").cat.starts_with("app"))
        shape: (1, 1)
        â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”
        â”‚ fruits â”‚
        â”‚ ---    â”‚
        â”‚ cat    â”‚
        â•žâ•â•â•â•â•â•â•â•â•¡
        â”‚ apple  â”‚
        â””â”€â”€â”€â”€â”€â”€â”€â”€â”˜
        z!'prefix' must be a string; found )Ú
isinstancer    r   Ú	TypeErrorr   r   Úcat_starts_with)r   r   Úmsgs      r   Ústarts_withzExprCatNameSpace.starts_with†   sW   € õd ˜&¥#Ñ&Ô&ð 	!ØUÕ6IÈ&Ñ6QÔ6QÐUÐUˆCÝ˜C‘.”.Ð Ý˜œ×5Ò5°fÑ=Ô=Ñ>Ô>Ð>r   Úsuffixc                ó¼   — t          |t          ¦  «        s!dt          |¦  «        ›}t          |¦  «        ‚t	          | j                             |¦  «        ¦  «        S )u(  
        Check if string representations of values end with a substring.

        Parameters
        ----------
        suffix
            Suffix substring.

        See Also
        --------
        contains : Check if string reprs contains a substring that matches a pattern.
        starts_with : Check if string reprs start with a substring.

        Notes
        -----
        Whereas `str.ends_with` allows expression inputs, `cat.ends_with` requires a
        literal string value.

        Examples
        --------
        >>> df = pl.DataFrame(
        ...     {"fruits": pl.Series(["apple", "mango", None], dtype=pl.Categorical)}
        ... )
        >>> df.with_columns(pl.col("fruits").cat.ends_with("go").alias("has_suffix"))
        shape: (3, 2)
        â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
        â”‚ fruits â”† has_suffix â”‚
        â”‚ ---    â”† ---        â”‚
        â”‚ cat    â”† bool       â”‚
        â•žâ•â•â•â•â•â•â•â•â•ªâ•â•â•â•â•â•â•â•â•â•â•â•â•¡
        â”‚ apple  â”† false      â”‚
        â”‚ mango  â”† true       â”‚
        â”‚ null   â”† null       â”‚
        â””â”€â”€â”€â”€â”€â”€â”€â”€â”´â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜

        Using `ends_with` as a filter condition:

        >>> df.filter(pl.col("fruits").cat.ends_with("go"))
        shape: (1, 1)
        â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”
        â”‚ fruits â”‚
        â”‚ ---    â”‚
        â”‚ cat    â”‚
        â•žâ•â•â•â•â•â•â•â•â•¡
        â”‚ mango  â”‚
        â””â”€â”€â”€â”€â”€â”€â”€â”€â”˜
        z!'suffix' must be a string; found )r"   r    r   r#   r   r   Úcat_ends_with)r   r'   r%   s      r   Ú	ends_withzExprCatNameSpace.ends_with½   sW   € õ` ˜&¥#Ñ&Ô&ð 	!ØUÕ6IÈ&Ñ6QÔ6QÐUÐUˆCÝ˜C‘.”.Ð Ý˜œ×3Ò3°FÑ;Ô;Ñ<Ô<Ð<r   NÚoffsetÚintÚlengthú
int | Nonec                óR   — t          | j                             ||¦  «        ¦  «        S )uN	  
        Extract a substring from the string representation of each value.

        Parameters
        ----------
        offset
            Start index. Negative indexing is supported.
        length
            Length of the slice. If set to `None` (default), the slice is taken to the
            end of the string.

        Returns
        -------
        Expr
            Expression of data type :class:`String`.

        Notes
        -----
        Both the `offset` and `length` inputs are defined in terms of the number
        of characters in the (UTF8) string. A character is defined as a
        `Unicode scalar value`_. A single character is represented by a single byte
        when working with ASCII text, and a maximum of 4 bytes otherwise.

        .. _Unicode scalar value: https://www.unicode.org/glossary/#unicode_scalar_value

        Examples
        --------
        >>> df = pl.DataFrame(
        ...     {
        ...         "s": pl.Series(
        ...             ["pear", None, "papaya", "dragonfruit"],
        ...             dtype=pl.Categorical,
        ...         )
        ...     }
        ... )
        >>> df.with_columns(pl.col("s").cat.slice(-3).alias("slice"))
        shape: (4, 2)
        â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”€â”
        â”‚ s           â”† slice â”‚
        â”‚ ---         â”† ---   â”‚
        â”‚ cat         â”† str   â”‚
        â•žâ•â•â•â•â•â•â•â•â•â•â•â•â•â•ªâ•â•â•â•â•â•â•â•¡
        â”‚ pear        â”† ear   â”‚
        â”‚ null        â”† null  â”‚
        â”‚ papaya      â”† aya   â”‚
        â”‚ dragonfruit â”† uit   â”‚
        â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”´â”€â”€â”€â”€â”€â”€â”€â”˜

        Using the optional `length` parameter

        >>> df.with_columns(pl.col("s").cat.slice(4, length=3).alias("slice"))
        shape: (4, 2)
        â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”€â”
        â”‚ s           â”† slice â”‚
        â”‚ ---         â”† ---   â”‚
        â”‚ cat         â”† str   â”‚
        â•žâ•â•â•â•â•â•â•â•â•â•â•â•â•â•ªâ•â•â•â•â•â•â•â•¡
        â”‚ pear        â”†       â”‚
        â”‚ null        â”† null  â”‚
        â”‚ papaya      â”† ya    â”‚
        â”‚ dragonfruit â”† onf   â”‚
        â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”´â”€â”€â”€â”€â”€â”€â”€â”˜
        )r   r   Ú	cat_slice)r   r+   r-   s      r   ÚslicezExprCatNameSpace.sliceò   s%   € õ@ ˜œ×/Ò/°¸Ñ?Ô?Ñ@Ô@Ð@r   )r   r   r   r   )r   r   )r   r    r   r   )r'   r    r   r   r   )r+   r,   r-   r.   r   r   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__Ú	_accessorr   r   r   r   r&   r*   r1   © r   r   r	   r	      sÂ   € € € € € Ø8Ð8à€Ið$ð $ð $ð $ð<ð <ð <ð <ð.)7ð )7ð )7ð )7ðV.7ð .7ð .7ð .7ð`5?ð 5?ð 5?ð 5?ðn3=ð 3=ð 3=ð 3=ðj@Að @Að @Að @Að @Að @Að @Ar   r	   N)Ú
__future__r   Útypingr   Úpolars._utils.variousr   Úpolars._utils.wrapr   Úpolarsr   r	   r7   r   r   ú<module>r=      s­   ðØ "Ð "Ð "Ð "Ð "Ð "à  Ð  Ð  Ð  Ð  Ð  à 5Ð 5Ð 5Ð 5Ð 5Ð 5Ø (Ð (Ð (Ð (Ð (Ð (àð ØÐÐÐÐÐðfAð fAð fAð fAð fAñ fAô fAð fAð fAð fAr   