"""Important note on ids
----------------------------------------------------------------------------

Multiple id generation schemes are used due to backwards compatibility.
- v1: 1.2.3 <= version < 1.3
      The style used before the rewrite.
      It is not the actual old code, but a replication of the behaviour.
- v2: 1.3 <= version < now
      Standardised mangling scheme from
      https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling
      though not completely implemented.
All versions are generated and attached to elements. The newest is used for
the index. All of the versions should work as permalinks.


Signature Nodes and Tagnames
----------------------------------------------------------------------------

Each signature is in a desc_signature node, where all children are
desc_signature_line nodes. Each of these lines will have the attribute
'sphinx_line_type' set to one of the following (prioritized):
- 'declarator', if the line contains the name of the declared object.
- 'templateParams', if the line starts a template parameter list,
- 'templateParams', if the line has template parameters
  Note: such lines might get a new tag in the future.
- 'templateIntroduction, if the line is on the form 'conceptName{...}'
No other desc_signature nodes should exist (so far).


Grammar
----------------------------------------------------------------------------

See https://www.nongnu.org/hcb/ for the grammar,
and https://github.com/cplusplus/draft/blob/master/source/grammar.tex,
and https://github.com/cplusplus/concepts-ts
for the newest grammar.

common grammar things:
    template-declaration ->
        "template" "<" template-parameter-list ">" declaration
    template-parameter-list ->
          template-parameter
        | template-parameter-list "," template-parameter
    template-parameter ->
          type-parameter
        | parameter-declaration # i.e., same as a function argument

    type-parameter ->
          "class"    "..."[opt] identifier[opt]
        | "class"               identifier[opt] "=" type-id
        | "typename" "..."[opt] identifier[opt]
        | "typename"            identifier[opt] "=" type-id
        | "template" "<" template-parameter-list ">"
            "class"  "..."[opt] identifier[opt]
        | "template" "<" template-parameter-list ">"
            "class"             identifier[opt] "=" id-expression
        # also, from C++17 we can have "typename" in template templates
    templateDeclPrefix ->
        "template" "<" template-parameter-list ">"

    simple-declaration ->
        attribute-specifier-seq[opt] decl-specifier-seq[opt]
            init-declarator-list[opt] ;
    # Make the semicolon optional.
    # For now: drop the attributes (TODO).
    # Use at most 1 init-declarator.
    -> decl-specifier-seq init-declarator
    -> decl-specifier-seq declarator initializer

    decl-specifier ->
          storage-class-specifier ->
             (  "static" (only for member_object and function_object)
              | "extern" (only for member_object and function_object)
              | "register"
             )
             thread_local[opt] (only for member_object)
                               (it can also appear before the others)

        | type-specifier -> trailing-type-specifier
        | function-specifier -> "inline" | "virtual" | "explicit" (only
          for function_object)
        | "friend" (only for function_object)
        | "constexpr" (only for member_object and function_object)
    trailing-type-specifier ->
          simple-type-specifier
        | elaborated-type-specifier
        | typename-specifier
        | cv-qualifier -> "const" | "volatile"
    stricter grammar for decl-specifier-seq (with everything, each object
    uses a subset):
        visibility storage-class-specifier function-specifier "friend"
        "constexpr" "volatile" "const" trailing-type-specifier
        # where trailing-type-specifier can no be cv-qualifier
    # Inside e.g., template parameters a strict subset is used
    # (see type-specifier-seq)
    trailing-type-specifier ->
          simple-type-specifier ->
            ::[opt] nested-name-specifier[opt] type-name
          | ::[opt] nested-name-specifier "template" simple-template-id
          | "char" | "bool" | etc.
          | decltype-specifier
        | elaborated-type-specifier ->
            class-key attribute-specifier-seq[opt] ::[opt]
            nested-name-specifier[opt] identifier
          | class-key ::[opt] nested-name-specifier[opt] template[opt]
            simple-template-id
          | "enum" ::[opt] nested-name-specifier[opt] identifier
        | typename-specifier ->
            "typename" ::[opt] nested-name-specifier identifier
          | "typename" ::[opt] nested-name-specifier template[opt]
            simple-template-id
    class-key -> "class" | "struct" | "union"
    type-name ->* identifier | simple-template-id
    # ignoring attributes and decltype, and then some left-factoring
    trailing-type-specifier ->
        rest-of-trailing
        ("class" | "struct" | "union" | "typename") rest-of-trailing
        built-in -> "char" | "bool" | etc.
        decltype-specifier
    rest-of-trailing -> (with some simplification)
        "::"[opt] list-of-elements-separated-by-::
    element ->
        "template"[opt] identifier ("<" template-argument-list ">")[opt]
    template-argument-list ->
          template-argument "..."[opt]
        | template-argument-list "," template-argument "..."[opt]
    template-argument ->
          constant-expression
        | type-specifier-seq abstract-declarator
        | id-expression


    declarator ->
          ptr-declarator
        | noptr-declarator parameters-and-qualifiers trailing-return-type
    ptr-declarator ->
          noptr-declarator
        | ptr-operator ptr-declarator
    noptr-declarator ->
          declarator-id attribute-specifier-seq[opt] ->
                "..."[opt] id-expression
              | rest-of-trailing
        | noptr-declarator parameters-and-qualifiers
        | noptr-declarator "[" constant-expression[opt] "]"
          attribute-specifier-seq[opt]
        | "(" ptr-declarator ")"
    ptr-operator ->
          "*"  attribute-specifier-seq[opt] cv-qualifier-seq[opt]
        | "&   attribute-specifier-seq[opt]
        | "&&" attribute-specifier-seq[opt]
        | "::"[opt] nested-name-specifier "*" attribute-specifier-seq[opt]
            cv-qualifier-seq[opt]
    # function_object must use a parameters-and-qualifiers, the others may
    # use it (e.g., function pointers)
    parameters-and-qualifiers ->
        "(" parameter-clause ")" attribute-specifier-seq[opt]
        cv-qualifier-seq[opt] ref-qualifier[opt]
        exception-specification[opt]
    ref-qualifier -> "&" | "&&"
    exception-specification ->
        "noexcept" ("(" constant-expression ")")[opt]
        "throw" ("(" type-id-list ")")[opt]
    # TODO: we don't implement attributes
    # member functions can have initializers, but we fold them into here
    memberFunctionInit -> "=" "0"
    # (note: only "0" is allowed as the value, according to the standard,
    # right?)

    enum-head ->
        enum-key attribute-specifier-seq[opt] nested-name-specifier[opt]
            identifier enum-base[opt]
    enum-key -> "enum" | "enum struct" | "enum class"
    enum-base ->
        ":" type
    enumerator-definition ->
          identifier
        | identifier "=" constant-expression

We additionally add the possibility for specifying the visibility as the
first thing.

concept_object:
    goal:
        just a declaration of the name (for now)

    grammar: only a single template parameter list, and the nested name
        may not have any template argument lists

        "template" "<" template-parameter-list ">"
        nested-name-specifier

type_object:
    goal:
        either a single type (e.g., "MyClass:Something_T" or a typedef-like
        thing (e.g. "Something Something_T" or "int I_arr[]"
    grammar, single type: based on a type in a function parameter, but
    without a name:
           parameter-declaration
        -> attribute-specifier-seq[opt] decl-specifier-seq
           abstract-declarator[opt]
        # Drop the attributes
        -> decl-specifier-seq abstract-declarator[opt]
    grammar, typedef-like: no initializer
        decl-specifier-seq declarator
    Can start with a templateDeclPrefix.

member_object:
    goal: as a type_object which must have a declarator, and optionally
    with a initializer
    grammar:
        decl-specifier-seq declarator initializer
    Can start with a templateDeclPrefix.

function_object:
    goal: a function declaration, TODO: what about templates? for now: skip
    grammar: no initializer
       decl-specifier-seq declarator
    Can start with a templateDeclPrefix.

class_object:
    goal: a class declaration, but with specification of a base class
    grammar:
          attribute-specifier-seq[opt]
              nested-name "final"[opt] (":" base-specifier-list)[opt]
        base-specifier-list ->
          base-specifier "..."[opt]
        | base-specifier-list, base-specifier "..."[opt]
        base-specifier ->
          base-type-specifier
        | "virtual" access-spe"cifier[opt]    base-type-specifier
        | access-specifier[opt] "virtual"[opt] base-type-specifier
    Can start with a templateDeclPrefix.

enum_object:
    goal: an unscoped enum or a scoped enum, optionally with the underlying
          type specified
    grammar:
        ("class" | "struct")[opt] visibility[opt]
            attribute-specifier-seq[opt] nested-name (":" type)[opt]
enumerator_object:
    goal: an element in a scoped or unscoped enum. The name should be
          injected according to the scopedness.
    grammar:
        nested-name ("=" constant-expression)

namespace_object:
    goal: a directive to put all following declarations in a specific scope
    grammar:
        nested-name
"""

from __future__ import annotations

import re
from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from collections.abc import Sequence, Set

udl_identifier_re = re.compile(
    r'[a-zA-Z_][a-zA-Z0-9_]*\b'  # note, no word boundary in the beginning
)
_string_re = re.compile(
    r"[LuU8]?('([^'\\]*(?:\\.[^'\\]*)*)'"
    r'|"([^"\\]*(?:\\.[^"\\]*)*)")',
    re.DOTALL,
)
_visibility_re = re.compile(r'\b(public|private|protected)\b')
_operator_re = re.compile(
    r"""
        \[\s*\]
    |   \(\s*\)
    |   \+\+ | --
    |   ->\*? | \,
    |   (<<|>>)=? | && | \|\|
    |   <=>
    |   [!<>=/*%+|&^~-]=?
    |   (\b(and|and_eq|bitand|bitor|compl|not|not_eq|or|or_eq|xor|xor_eq)\b)
    """,
    re.VERBOSE,
)
_fold_operator_re = re.compile(
    r"""
        ->\*    |    \.\*    |    \,
    |   (<<|>>)=?    |    &&    |    \|\|
    |   !=
    |   [<>=/*%+|&^~-]=?
    """,
    re.VERBOSE,
)
# see https://en.cppreference.com/w/cpp/keyword
_keywords: Set[str] = frozenset({
    'alignas', 'alignof', 'and', 'and_eq', 'asm', 'auto',
    'bitand', 'bitor', 'bool', 'break',
    'case', 'catch', 'class', 'compl', 'concept', 'continue',
    'char', 'char8_t', 'char16_t', 'char32_t',
    'const', 'const_cast', 'consteval', 'constexpr', 'constinit',
    'decltype', 'default', 'delete', 'do', 'double', 'dynamic_cast',
    'else', 'enum', 'explicit', 'export', 'extern',
    'false', 'float', 'for', 'friend',
    'goto',
    'if', 'inline', 'int',
    'long',
    'mutable',
    'namespace', 'new', 'noexcept', 'not', 'not_eq', 'nullptr',
    'operator', 'or', 'or_eq',
    'private', 'protected', 'public',
    'register', 'reinterpret_cast', 'requires', 'return',
    'short', 'signed', 'sizeof', 'static',
    'static_assert', 'static_cast', 'struct', 'switch',
    'template', 'this', 'thread_local', 'throw',
    'true', 'try', 'typedef', 'typeid', 'typename',
    'union', 'unsigned', 'using',
    'virtual', 'void', 'volatile',
    'wchar_t', 'while',
    'xor', 'xor_eq',
})  # fmt: skip


_simple_type_specifiers_re = re.compile(
    r"""
    \b(
    auto|void|bool
    |signed|unsigned
    |short|long
    |char|wchar_t|char(8|16|32)_t
    |int
    |__int(64|128)  # extension
    |float|double
    |__float80|_Float64x|__float128|_Float128  # extension
    |_Complex|_Imaginary  # extension
    )\b
    """,
    re.VERBOSE,
)

_max_id = 4
_id_prefix: Sequence[str] = ('', '', '_CPPv2', '_CPPv3', '_CPPv4')
# Ids are used in lookup keys which are used across pickled files,
# so when _max_id changes, make sure to update the ENV_VERSION.

# ------------------------------------------------------------------------------
# Id v1 constants
# ------------------------------------------------------------------------------

_id_fundamental_v1 = {
    'char': 'c',
    'signed char': 'c',
    'unsigned char': 'C',
    'int': 'i',
    'signed int': 'i',
    'unsigned int': 'U',
    'long': 'l',
    'signed long': 'l',
    'unsigned long': 'L',
    'bool': 'b',
}
_id_shorthands_v1 = {
    'std::string': 'ss',
    'std::ostream': 'os',
    'std::istream': 'is',
    'std::iostream': 'ios',
    'std::vector': 'v',
    'std::map': 'm',
}
_id_operator_v1 = {
    'new': 'new-operator',
    'new[]': 'new-array-operator',
    'delete': 'delete-operator',
    'delete[]': 'delete-array-operator',
    # the arguments will make the difference between unary and binary
    # '+(unary)' : 'ps',
    # '-(unary)' : 'ng',
    # '&(unary)' : 'ad',
    # '*(unary)' : 'de',
    '~': 'inv-operator',
    '+': 'add-operator',
    '-': 'sub-operator',
    '*': 'mul-operator',
    '/': 'div-operator',
    '%': 'mod-operator',
    '&': 'and-operator',
    '|': 'or-operator',
    '^': 'xor-operator',
    '=': 'assign-operator',
    '+=': 'add-assign-operator',
    '-=': 'sub-assign-operator',
    '*=': 'mul-assign-operator',
    '/=': 'div-assign-operator',
    '%=': 'mod-assign-operator',
    '&=': 'and-assign-operator',
    '|=': 'or-assign-operator',
    '^=': 'xor-assign-operator',
    '<<': 'lshift-operator',
    '>>': 'rshift-operator',
    '<<=': 'lshift-assign-operator',
    '>>=': 'rshift-assign-operator',
    '==': 'eq-operator',
    '!=': 'neq-operator',
    '<': 'lt-operator',
    '>': 'gt-operator',
    '<=': 'lte-operator',
    '>=': 'gte-operator',
    '!': 'not-operator',
    '&&': 'sand-operator',
    '||': 'sor-operator',
    '++': 'inc-operator',
    '--': 'dec-operator',
    ',': 'comma-operator',
    '->*': 'pointer-by-pointer-operator',
    '->': 'pointer-operator',
    '()': 'call-operator',
    '[]': 'subscript-operator',
}

# ------------------------------------------------------------------------------
# Id v > 1 constants
# ------------------------------------------------------------------------------

_id_fundamental_v2 = {
    # not all of these are actually parsed as fundamental types, TODO: do that
    'void': 'v',
    'bool': 'b',
    'char': 'c',
    'signed char': 'a',
    'unsigned char': 'h',
    'wchar_t': 'w',
    'char32_t': 'Di',
    'char16_t': 'Ds',
    'char8_t': 'Du',
    'short': 's',
    'short int': 's',
    'signed short': 's',
    'signed short int': 's',
    'unsigned short': 't',
    'unsigned short int': 't',
    'int': 'i',
    'signed': 'i',
    'signed int': 'i',
    'unsigned': 'j',
    'unsigned int': 'j',
    'long': 'l',
    'long int': 'l',
    'signed long': 'l',
    'signed long int': 'l',
    'unsigned long': 'm',
    'unsigned long int': 'm',
    'long long': 'x',
    'long long int': 'x',
    'signed long long': 'x',
    'signed long long int': 'x',
    '__int64': 'x',
    'unsigned long long': 'y',
    'unsigned long long int': 'y',
    '__int128': 'n',
    'signed __int128': 'n',
    'unsigned __int128': 'o',
    'float': 'f',
    'double': 'd',
    'long double': 'e',
    '__float80': 'e',
    '_Float64x': 'e',
    '__float128': 'g',
    '_Float128': 'g',
    '_Complex float': 'Cf',
    '_Complex double': 'Cd',
    '_Complex long double': 'Ce',
    '_Imaginary float': 'f',
    '_Imaginary double': 'd',
    '_Imaginary long double': 'e',
    'auto': 'Da',
    'decltype(auto)': 'Dc',
    'std::nullptr_t': 'Dn',
}
_id_operator_v2 = {
    'new': 'nw',
    'new[]': 'na',
    'delete': 'dl',
    'delete[]': 'da',
    # the arguments will make the difference between unary and binary
    # in operator definitions
    # '+(unary)' : 'ps',
    # '-(unary)' : 'ng',
    # '&(unary)' : 'ad',
    # '*(unary)' : 'de',
    '~': 'co',
    'compl': 'co',
    '+': 'pl',
    '-': 'mi',
    '*': 'ml',
    '/': 'dv',
    '%': 'rm',
    '&': 'an',
    'bitand': 'an',
    '|': 'or',
    'bitor': 'or',
    '^': 'eo',
    'xor': 'eo',
    '=': 'aS',
    '+=': 'pL',
    '-=': 'mI',
    '*=': 'mL',
    '/=': 'dV',
    '%=': 'rM',
    '&=': 'aN',
    'and_eq': 'aN',
    '|=': 'oR',
    'or_eq': 'oR',
    '^=': 'eO',
    'xor_eq': 'eO',
    '<<': 'ls',
    '>>': 'rs',
    '<<=': 'lS',
    '>>=': 'rS',
    '==': 'eq',
    '!=': 'ne',
    'not_eq': 'ne',
    '<': 'lt',
    '>': 'gt',
    '<=': 'le',
    '>=': 'ge',
    '<=>': 'ss',
    '!': 'nt',
    'not': 'nt',
    '&&': 'aa',
    'and': 'aa',
    '||': 'oo',
    'or': 'oo',
    '++': 'pp',
    '--': 'mm',
    ',': 'cm',
    '->*': 'pm',
    '->': 'pt',
    '()': 'cl',
    '[]': 'ix',
    '.*': 'ds',  # this one is not overloadable, but we need it for expressions
    '?': 'qu',
}
_id_operator_unary_v2 = {
    '++': 'pp_',
    '--': 'mm_',
    '*': 'de',
    '&': 'ad',
    '+': 'ps',
    '-': 'ng',
    '!': 'nt',
    'not': 'nt',
    '~': 'co',
    'compl': 'co',
}
_id_char_from_prefix: dict[str | None, str] = {
    None: 'c',
    'u8': 'c',
    'u': 'Ds',
    'U': 'Di',
    'L': 'w',
}
# these are ordered by preceedence
_expression_bin_ops: Sequence[tuple[str, ...]] = [
    ('||', 'or'),
    ('&&', 'and'),
    ('|', 'bitor'),
    ('^', 'xor'),
    ('&', 'bitand'),
    ('==', '!=', 'not_eq'),
    ('<=>', '<=', '>=', '<', '>'),
    ('<<', '>>'),
    ('+', '-'),
    ('*', '/', '%'),
    ('.*', '->*'),
]
_expression_unary_ops: Sequence[str] = [
    '++',
    '--',
    '*',
    '&',
    '+',
    '-',
    '!',
    'not',
    '~',
    'compl',
]
_expression_assignment_ops: Sequence[str] = [
    '=',
    '*=',
    '/=',
    '%=',
    '+=',
    '-=',
    '>>=',
    '<<=',
    '&=',
    'and_eq',
    '^=',
    '|=',
    'xor_eq',
    'or_eq',
]
_id_explicit_cast = {
    'dynamic_cast': 'dc',
    'static_cast': 'sc',
    'const_cast': 'cc',
    'reinterpret_cast': 'rc',
}
