from __future__ import annotations

from typing import TYPE_CHECKING

from polars._utils.parse import parse_into_expression
from polars._utils.various import scale_bytes
from polars._utils.wrap import wrap_expr
from polars.datatypes import parse_into_dtype

if TYPE_CHECKING:
    from polars import Expr
    from polars._typing import (
        Endianness,
        IntoExpr,
        PolarsDataType,
        SizeUnit,
        TransferEncoding,
    )


class ExprBinaryNameSpace:
    """Namespace for bin related expressions."""

    _accessor = "bin"

    def __init__(self, expr: Expr) -> None:
        self._pyexpr = expr._pyexpr

    def contains(self, literal: IntoExpr) -> Expr:
        r"""
        Check if binaries in Series contain a binary substring.

        Parameters
        ----------
        literal
            The binary substring to look for

        Returns
        -------
        Expr
            Expression of data type :class:`Boolean`.

        See Also
        --------
        starts_with : Check if the binary substring exists at the start
        ends_with : Check if the binary substring exists at the end

        Examples
        --------
        >>> colors = pl.DataFrame(
        ...     {
        ...         "name": ["black", "yellow", "blue"],
        ...         "code": [b"\x00\x00\x00", b"\xff\xff\x00", b"\x00\x00\xff"],
        ...         "lit": [b"\x00", b"\xff\x00", b"\xff\xff"],
        ...     }
        ... )
        >>> colors.select(
        ...     "name",
        ...     pl.col("code").bin.contains(b"\xff").alias("contains_with_lit"),
        ...     pl.col("code").bin.contains(pl.col("lit")).alias("contains_with_expr"),
        ... )
        shape: (3, 3)
        ┌────────┬───────────────────┬────────────────────┐
        │ name   ┆ contains_with_lit ┆ contains_with_expr │
        │ ---    ┆ ---               ┆ ---                │
        │ str    ┆ bool              ┆ bool               │
        ╞════════╪═══════════════════╪════════════════════╡
        │ black  ┆ false             ┆ true               │
        │ yellow ┆ true              ┆ true               │
        │ blue   ┆ true              ┆ false              │
        └────────┴───────────────────┴────────────────────┘
        """
        literal = parse_into_expression(literal, str_as_lit=True)
        return wrap_expr(self._pyexpr.bin_contains(literal))

    def ends_with(self, suffix: IntoExpr) -> Expr:
        r"""
        Check if string values end with a binary substring.

        Parameters
        ----------
        suffix
            Suffix substring.

        Returns
        -------
        Expr
            Expression of data type :class:`Boolean`.

        See Also
        --------
        starts_with : Check if the binary substring exists at the start
        contains : Check if the binary substring exists anywhere

        Examples
        --------
        >>> colors = pl.DataFrame(
        ...     {
        ...         "name": ["black", "yellow", "blue"],
        ...         "code": [b"\x00\x00\x00", b"\xff\xff\x00", b"\x00\x00\xff"],
        ...         "suffix": [b"\x00", b"\xff\x00", b"\x00\x00"],
        ...     }
        ... )
        >>> colors.select(
        ...     "name",
        ...     pl.col("code").bin.ends_with(b"\xff").alias("ends_with_lit"),
        ...     pl.col("code").bin.ends_with(pl.col("suffix")).alias("ends_with_expr"),
        ... )
        shape: (3, 3)
        ┌────────┬───────────────┬────────────────┐
        │ name   ┆ ends_with_lit ┆ ends_with_expr │
        │ ---    ┆ ---           ┆ ---            │
        │ str    ┆ bool          ┆ bool           │
        ╞════════╪═══════════════╪════════════════╡
        │ black  ┆ false         ┆ true           │
        │ yellow ┆ false         ┆ true           │
        │ blue   ┆ true          ┆ false          │
        └────────┴───────────────┴────────────────┘
        """
        suffix = parse_into_expression(suffix, str_as_lit=True)
        return wrap_expr(self._pyexpr.bin_ends_with(suffix))

    def starts_with(self, prefix: IntoExpr) -> Expr:
        r"""
        Check if values start with a binary substring.

        Parameters
        ----------
        prefix
            Prefix substring.

        Returns
        -------
        Expr
            Expression of data type :class:`Boolean`.

        See Also
        --------
        ends_with : Check if the binary substring exists at the end
        contains : Check if the binary substring exists anywhere

        Examples
        --------
        >>> colors = pl.DataFrame(
        ...     {
        ...         "name": ["black", "yellow", "blue"],
        ...         "code": [b"\x00\x00\x00", b"\xff\xff\x00", b"\x00\x00\xff"],
        ...         "prefix": [b"\x00", b"\xff\x00", b"\x00\x00"],
        ...     }
        ... )
        >>> colors.select(
        ...     "name",
        ...     pl.col("code").bin.starts_with(b"\xff").alias("starts_with_lit"),
        ...     pl.col("code")
        ...     .bin.starts_with(pl.col("prefix"))
        ...     .alias("starts_with_expr"),
        ... )
        shape: (3, 3)
        ┌────────┬─────────────────┬──────────────────┐
        │ name   ┆ starts_with_lit ┆ starts_with_expr │
        │ ---    ┆ ---             ┆ ---              │
        │ str    ┆ bool            ┆ bool             │
        ╞════════╪═════════════════╪══════════════════╡
        │ black  ┆ false           ┆ true             │
        │ yellow ┆ true            ┆ false            │
        │ blue   ┆ false           ┆ true             │
        └────────┴─────────────────┴──────────────────┘
        """
        prefix = parse_into_expression(prefix, str_as_lit=True)
        return wrap_expr(self._pyexpr.bin_starts_with(prefix))

    def decode(self, encoding: TransferEncoding, *, strict: bool = True) -> Expr:
        r"""
        Decode values using the provided encoding.

        Parameters
        ----------
        encoding : {'hex', 'base64'}
            The encoding to use.
        strict
            Raise an error if the underlying value cannot be decoded,
            otherwise mask out with a null value.

        Returns
        -------
        Expr
            Expression of data type :class:`Binary`.

        Examples
        --------
        >>> colors = pl.DataFrame(
        ...     {
        ...         "name": ["black", "yellow", "blue"],
        ...         "encoded": [b"000000", b"ffff00", b"0000ff"],
        ...     }
        ... )
        >>> colors.with_columns(
        ...     pl.col("encoded").bin.decode("hex").alias("code"),
        ... )
        shape: (3, 3)
        ┌────────┬───────────┬─────────────────┐
        │ name   ┆ encoded   ┆ code            │
        │ ---    ┆ ---       ┆ ---             │
        │ str    ┆ binary    ┆ binary          │
        ╞════════╪═══════════╪═════════════════╡
        │ black  ┆ b"000000" ┆ b"\x00\x00\x00" │
        │ yellow ┆ b"ffff00" ┆ b"\xff\xff\x00" │
        │ blue   ┆ b"0000ff" ┆ b"\x00\x00\xff" │
        └────────┴───────────┴─────────────────┘
        """
        if encoding == "hex":
            return wrap_expr(self._pyexpr.bin_hex_decode(strict))
        elif encoding == "base64":
            return wrap_expr(self._pyexpr.bin_base64_decode(strict))
        else:
            msg = f"`encoding` must be one of {{'hex', 'base64'}}, got {encoding!r}"
            raise ValueError(msg)

    def encode(self, encoding: TransferEncoding) -> Expr:
        r"""
        Encode a value using the provided encoding.

        Parameters
        ----------
        encoding : {'hex', 'base64'}
            The encoding to use.

        Returns
        -------
        Expr
            Expression of data type :class:`Binary`.

        Examples
        --------
        >>> colors = pl.DataFrame(
        ...     {
        ...         "color": ["black", "yellow", "blue"],
        ...         "code": [b"\x00\x00\x00", b"\xff\xff\x00", b"\x00\x00\xff"],
        ...     }
        ... )
        >>> colors.with_columns(
        ...     pl.col("code").bin.encode("hex").alias("encoded"),
        ... )
        shape: (3, 3)
        ┌────────┬─────────────────┬─────────┐
        │ color  ┆ code            ┆ encoded │
        │ ---    ┆ ---             ┆ ---     │
        │ str    ┆ binary          ┆ str     │
        ╞════════╪═════════════════╪═════════╡
        │ black  ┆ b"\x00\x00\x00" ┆ 000000  │
        │ yellow ┆ b"\xff\xff\x00" ┆ ffff00  │
        │ blue   ┆ b"\x00\x00\xff" ┆ 0000ff  │
        └────────┴─────────────────┴─────────┘
        """
        if encoding == "hex":
            return wrap_expr(self._pyexpr.bin_hex_encode())
        elif encoding == "base64":
            return wrap_expr(self._pyexpr.bin_base64_encode())
        else:
            msg = f"`encoding` must be one of {{'hex', 'base64'}}, got {encoding!r}"
            raise ValueError(msg)

    def size(self, unit: SizeUnit = "b") -> Expr:
        r"""
        Get the size of binary values in the given unit.

        Parameters
        ----------
        unit : {'b', 'kb', 'mb', 'gb', 'tb'}
            Scale the returned size to the given unit.

        Returns
        -------
        Expr
            Expression of data type :class:`UInt32` or `Float64`.

        Examples
        --------
        >>> from os import urandom
        >>> df = pl.DataFrame({"data": [urandom(n) for n in (512, 256, 1024)]})
        >>> df.with_columns(  # doctest: +IGNORE_RESULT
        ...     n_bytes=pl.col("data").bin.size(),
        ...     n_kilobytes=pl.col("data").bin.size("kb"),
        ... )
        shape: (4, 3)
        ┌─────────────────────────────────┬─────────┬─────────────┐
        │ data                            ┆ n_bytes ┆ n_kilobytes │
        │ ---                             ┆ ---     ┆ ---         │
        │ binary                          ┆ u32     ┆ f64         │
        ╞═════════════════════════════════╪═════════╪═════════════╡
        │ b"y?~B\x83\xf4V\x07\xd3\xfb\xb… ┆ 512     ┆ 0.5         │
        │ b"\xee$4@f\xc14\x07\x8e\x88\x1… ┆ 256     ┆ 0.25        │
        │ b"\x80\xbd\xb9nEq;2\x99$\xf9\x… ┆ 1024    ┆ 1.0         │
        └─────────────────────────────────┴─────────┴─────────────┘
        """
        sz = wrap_expr(self._pyexpr.bin_size_bytes())
        sz = scale_bytes(sz, unit)
        return sz

    def reinterpret(
        self, *, dtype: PolarsDataType, endianness: Endianness = "little"
    ) -> Expr:
        r"""
        Interpret a buffer as a numerical Polars type.

        Parameters
        ----------
        dtype : PolarsDataType
            Which type to interpret binary column into.
        endianness : {"big", "little"}, optional
            Which endianness to use when interpreting bytes, by default "little".

        Returns
        -------
        Expr
            Expression of data type `dtype`.
            Note that if binary array is too short value will be null.
            If binary array is too long, remainder will be ignored.

        Examples
        --------
        >>> df = pl.DataFrame({"data": [b"\x05\x00\x00\x00", b"\x10\x00\x01\x00"]})
        >>> df.with_columns(  # doctest: +IGNORE_RESULT
        ...     bin2int=pl.col("data").bin.reinterpret(
        ...         dtype=pl.Int32, endianness="little"
        ...     ),
        ... )
        shape: (2, 2)
        ┌─────────────────────┬─────────┐
        │ data                ┆ bin2int │
        │ ---                 ┆ ---     │
        │ binary              ┆ i32     │
        ╞═════════════════════╪═════════╡
        │ b"\x05\x00\x00\x00" ┆ 5       │
        │ b"\x10\x00\x01\x00" ┆ 65552   │
        └─────────────────────┴─────────┘
        """
        dtype = parse_into_dtype(dtype)

        return wrap_expr(self._pyexpr.from_buffer(dtype, endianness))
