"""Tokenizers for three string formatting methods"""

from __future__ import annotations

from enum import Enum, unique
from typing import Final

from mypy.checkstrformat import (
    ConversionSpecifier,
    parse_conversion_specifiers,
    parse_format_value,
)
from mypy.errors import Errors
from mypy.messages import MessageBuilder
from mypy.nodes import Context, Expression
from mypy.options import Options
from mypyc.ir.ops import Integer, Value
from mypyc.ir.rtypes import (
    c_pyssize_t_rprimitive,
    is_bytes_rprimitive,
    is_int_rprimitive,
    is_short_int_rprimitive,
    is_str_rprimitive,
)
from mypyc.irbuild.builder import IRBuilder
from mypyc.primitives.bytes_ops import bytes_build_op
from mypyc.primitives.int_ops import int_to_str_op
from mypyc.primitives.str_ops import str_build_op, str_op


@unique
class FormatOp(Enum):
    """FormatOp represents conversion operations of string formatting during
    compile time.

    Compare to ConversionSpecifier, FormatOp has fewer attributes.
    For example, to mark a conversion from any object to string,
    ConversionSpecifier may have several representations, like '%s', '{}'
    or '{:{}}'. However, there would only exist one corresponding FormatOp.
    """

    STR = "s"
    INT = "d"
    BYTES = "b"


def generate_format_ops(specifiers: list[ConversionSpecifier]) -> list[FormatOp] | None:
    """Convert ConversionSpecifier to FormatOp.

    Different ConversionSpecifiers may share a same FormatOp.
    """
    format_ops = []
    for spec in specifiers:
        # TODO: Match specifiers instead of using whole_seq
        if spec.whole_seq == "%s" or spec.whole_seq == "{:{}}":
            format_op = FormatOp.STR
        elif spec.whole_seq == "%d":
            format_op = FormatOp.INT
        elif spec.whole_seq == "%b":
            format_op = FormatOp.BYTES
        elif spec.whole_seq:
            return None
        else:
            format_op = FormatOp.STR
        format_ops.append(format_op)
    return format_ops


def tokenizer_printf_style(format_str: str) -> tuple[list[str], list[FormatOp]] | None:
    """Tokenize a printf-style format string using regex.

    Return:
        A list of string literals and a list of FormatOps.
    """
    literals: list[str] = []
    specifiers: list[ConversionSpecifier] = parse_conversion_specifiers(format_str)
    format_ops = generate_format_ops(specifiers)
    if format_ops is None:
        return None

    last_end = 0
    for spec in specifiers:
        cur_start = spec.start_pos
        literals.append(format_str[last_end:cur_start])
        last_end = cur_start + len(spec.whole_seq)
    literals.append(format_str[last_end:])

    return literals, format_ops


# The empty Context as an argument for parse_format_value().
# It wouldn't be used since the code has passed the type-checking.
EMPTY_CONTEXT: Final = Context()


def tokenizer_format_call(format_str: str) -> tuple[list[str], list[FormatOp]] | None:
    """Tokenize a str.format() format string.

    The core function parse_format_value() is shared with mypy.
    With these specifiers, we then parse the literal substrings
    of the original format string and convert `ConversionSpecifier`
    to `FormatOp`.

    Return:
        A list of string literals and a list of FormatOps. The literals
        are interleaved with FormatOps and the length of returned literals
        should be exactly one more than FormatOps.
        Return None if it cannot parse the string.
    """
    # Creates an empty MessageBuilder here.
    # It wouldn't be used since the code has passed the type-checking.
    specifiers = parse_format_value(
        format_str, EMPTY_CONTEXT, MessageBuilder(Errors(Options()), {})
    )
    if specifiers is None:
        return None
    format_ops = generate_format_ops(specifiers)
    if format_ops is None:
        return None

    literals: list[str] = []
    last_end = 0
    for spec in specifiers:
        # Skip { and }
        literals.append(format_str[last_end : spec.start_pos - 1])
        last_end = spec.start_pos + len(spec.whole_seq) + 1
    literals.append(format_str[last_end:])
    # Deal with escaped {{
    literals = [x.replace("{{", "{").replace("}}", "}") for x in literals]

    return literals, format_ops


def convert_format_expr_to_str(
    builder: IRBuilder, format_ops: list[FormatOp], exprs: list[Expression], line: int
) -> list[Value] | None:
    """Convert expressions into string literal objects with the guidance
    of FormatOps. Return None when fails."""
    if len(format_ops) != len(exprs):
        return None

    converted = []
    for x, format_op in zip(exprs, format_ops):
        node_type = builder.node_type(x)
        if format_op == FormatOp.STR:
            if is_str_rprimitive(node_type):
                var_str = builder.accept(x)
            elif is_int_rprimitive(node_type) or is_short_int_rprimitive(node_type):
                var_str = builder.primitive_op(int_to_str_op, [builder.accept(x)], line)
            else:
                var_str = builder.primitive_op(str_op, [builder.accept(x)], line)
        elif format_op == FormatOp.INT:
            if is_int_rprimitive(node_type) or is_short_int_rprimitive(node_type):
                var_str = builder.primitive_op(int_to_str_op, [builder.accept(x)], line)
            else:
                return None
        else:
            return None
        converted.append(var_str)
    return converted


def join_formatted_strings(
    builder: IRBuilder, literals: list[str] | None, substitutions: list[Value], line: int
) -> Value:
    """Merge the list of literals and the list of substitutions
    alternatively using 'str_build_op'.

    `substitutions` is the result value of formatting conversions.

    If the `literals` is set to None, we simply join the substitutions;
    Otherwise, the `literals` is the literal substrings of the original
    format string and its length should be exactly one more than
    substitutions.

    For example:
    (1)    'This is a %s and the value is %d'
        -> literals: ['This is a ', ' and the value is', '']
    (2)    '{} and the value is {}'
        -> literals: ['', ' and the value is', '']
    """
    # The first parameter for str_build_op is the total size of
    # the following PyObject*
    result_list: list[Value] = [Integer(0, c_pyssize_t_rprimitive)]

    if literals is not None:
        for a, b in zip(literals, substitutions):
            if a:
                result_list.append(builder.load_str(a))
            result_list.append(b)
        if literals[-1]:
            result_list.append(builder.load_str(literals[-1]))
    else:
        result_list.extend(substitutions)

    # Special case for empty string and literal string
    if len(result_list) == 1:
        return builder.load_str("")
    if not substitutions and len(result_list) == 2:
        return result_list[1]

    result_list[0] = Integer(len(result_list) - 1, c_pyssize_t_rprimitive)
    return builder.call_c(str_build_op, result_list, line)


def convert_format_expr_to_bytes(
    builder: IRBuilder, format_ops: list[FormatOp], exprs: list[Expression], line: int
) -> list[Value] | None:
    """Convert expressions into bytes literal objects with the guidance
    of FormatOps. Return None when fails."""
    if len(format_ops) != len(exprs):
        return None

    converted = []
    for x, format_op in zip(exprs, format_ops):
        node_type = builder.node_type(x)
        # conversion type 's' is an alias of 'b' in bytes formatting
        if format_op == FormatOp.BYTES or format_op == FormatOp.STR:
            if is_bytes_rprimitive(node_type):
                var_bytes = builder.accept(x)
            else:
                return None
        else:
            return None
        converted.append(var_bytes)
    return converted


def join_formatted_bytes(
    builder: IRBuilder, literals: list[str], substitutions: list[Value], line: int
) -> Value:
    """Merge the list of literals and the list of substitutions
    alternatively using 'bytes_build_op'."""
    result_list: list[Value] = [Integer(0, c_pyssize_t_rprimitive)]

    for a, b in zip(literals, substitutions):
        if a:
            result_list.append(builder.load_bytes_from_str_literal(a))
        result_list.append(b)
    if literals[-1]:
        result_list.append(builder.load_bytes_from_str_literal(literals[-1]))

    # Special case for empty bytes and literal
    if len(result_list) == 1:
        return builder.load_bytes_from_str_literal("")
    if not substitutions and len(result_list) == 2:
        return result_list[1]

    result_list[0] = Integer(len(result_list) - 1, c_pyssize_t_rprimitive)
    return builder.call_c(bytes_build_op, result_list, line)
