# Copyright (c) Jupyter Development Team.
# Distributed under the terms of the Modified BSD License.

"""
Localization utilities to find available language packs and packages with
localization data.
"""

from __future__ import annotations

import gettext
import importlib
import json
import locale
import os
import re
import sys
import traceback
from functools import lru_cache
from typing import Any, Pattern

import babel
from packaging.version import parse as parse_version

# See compatibility note on `group` keyword in https://docs.python.org/3/library/importlib.metadata.html#entry-points
if sys.version_info < (3, 10):  # pragma: no cover
    from importlib_metadata import entry_points
else:  # pragma: no cover
    from importlib.metadata import entry_points

# Entry points
JUPYTERLAB_LANGUAGEPACK_ENTRY = "jupyterlab.languagepack"
JUPYTERLAB_LOCALE_ENTRY = "jupyterlab.locale"

# Constants
DEFAULT_LOCALE = "en"
SYS_LOCALE = locale.getlocale()[0] or DEFAULT_LOCALE
LOCALE_DIR = "locale"
LC_MESSAGES_DIR = "LC_MESSAGES"
DEFAULT_DOMAIN = "jupyterlab"
L10N_SCHEMA_NAME = "@jupyterlab/translation-extension:plugin"
PY37_OR_LOWER = sys.version_info[:2] <= (3, 7)

# Pseudo language locale for in-context translation
PSEUDO_LANGUAGE = "ach_UG"

_default_schema_context = "schema"
_default_settings_context = "settings"
_lab_i18n_config = "jupyter.lab.internationalization"

# mapping of schema translatable string selectors to translation context
DEFAULT_SCHEMA_SELECTORS = {
    "properties/.*/title": _default_settings_context,
    "properties/.*/description": _default_settings_context,
    "definitions/.*/properties/.*/title": _default_settings_context,
    "definitions/.*/properties/.*/description": _default_settings_context,
    "title": _default_schema_context,
    "description": _default_schema_context,
    # JupyterLab-specific
    r"jupyter\.lab\.setting-icon-label": _default_settings_context,
    r"jupyter\.lab\.menus/.*/label": "menu",
    r"jupyter\.lab\.toolbars/.*/label": "toolbar",
}


@lru_cache
def _get_default_schema_selectors() -> dict[Pattern, str]:
    return {
        re.compile("^/" + pattern + "$"): context
        for pattern, context in DEFAULT_SCHEMA_SELECTORS.items()
    }


def _prepare_schema_patterns(schema: dict) -> dict[Pattern, str]:
    return {
        **_get_default_schema_selectors(),
        **{
            re.compile("^/" + selector + "$"): _default_schema_context
            for selector in schema.get(_lab_i18n_config, {}).get("selectors", [])
        },
    }


# --- Private process helpers
# ----------------------------------------------------------------------------
def _get_installed_language_pack_locales() -> tuple[dict[str, Any], str]:
    """
    Get available installed language pack locales.

    Returns
    -------
    tuple
        A tuple, where the first item is the result and the second item any
        error messages.
    """
    data = {}
    messages = []
    for entry_point in entry_points(group=JUPYTERLAB_LANGUAGEPACK_ENTRY):
        try:
            data[entry_point.name] = os.path.dirname(entry_point.load().__file__)
        except Exception:  # pragma: no cover
            messages.append(traceback.format_exc())

    message = "\n".join(messages)
    return data, message


def _get_installed_package_locales() -> tuple[dict[str, Any], str]:
    """
    Get available installed packages containing locale information.

    Returns
    -------
    tuple
        A tuple, where the first item is the result and the second item any
        error messages. The value for the key points to the root location
        the package.
    """
    data = {}
    messages = []
    for entry_point in entry_points(group=JUPYTERLAB_LOCALE_ENTRY):
        try:
            data[entry_point.name] = os.path.dirname(entry_point.load().__file__)
        except Exception:
            messages.append(traceback.format_exc())

    message = "\n".join(messages)
    return data, message


# --- Helpers
# ----------------------------------------------------------------------------
def is_valid_locale(locale_: str) -> bool:
    """
    Check if a `locale_` value is valid.

    Parameters
    ----------
    locale_: str
        Language locale code.

    Notes
    -----
    A valid locale is in the form language (See ISO-639 standard) and an
    optional territory (See ISO-3166 standard).

    Examples of valid locales:
    - English: DEFAULT_LOCALE
    - Australian English: "en_AU"
    - Portuguese: "pt"
    - Brazilian Portuguese: "pt_BR"

    Examples of invalid locales:
    - Australian Spanish: "es_AU"
    - Brazilian German: "de_BR"
    """
    # Add exception for Norwegian
    if locale_ in {
        "no_NO",
    }:
        return True

    valid = False
    try:
        babel.Locale.parse(locale_)
        valid = True
    except (babel.core.UnknownLocaleError, ValueError):
        # Expected error if the locale is unknown
        pass

    return valid


def get_display_name(locale_: str, display_locale: str = DEFAULT_LOCALE) -> str:
    """
    Return the language name to use with a `display_locale` for a given language locale.

    Parameters
    ----------
    locale_: str
        The language name to use.
    display_locale: str, optional
        The language to display the `locale_`.

    Returns
    -------
    str
        Localized `locale_` and capitalized language name using `display_locale` as language.
    """
    locale_ = locale_ if is_valid_locale(locale_) else DEFAULT_LOCALE
    display_locale = display_locale if is_valid_locale(display_locale) else DEFAULT_LOCALE
    try:
        loc = babel.Locale.parse(locale_)
        display_name = loc.get_display_name(display_locale)
    except babel.UnknownLocaleError:
        display_name = display_locale
    if display_name:
        display_name = display_name[0].upper() + display_name[1:]
    return display_name  # type:ignore[return-value]


def merge_locale_data(
    language_pack_locale_data: dict[str, Any], package_locale_data: dict[str, Any]
) -> dict[str, Any]:
    """
    Merge language pack data with locale data bundled in packages.

    Parameters
    ----------
    language_pack_locale_data: dict
        The dictionary with language pack locale data.
    package_locale_data: dict
        The dictionary with package locale data.

    Returns
    -------
    dict
        Merged locale data.
    """
    result = language_pack_locale_data
    package_lp_metadata = language_pack_locale_data.get("", {})
    package_lp_version = package_lp_metadata.get("version", None)
    package_lp_domain = package_lp_metadata.get("domain", None)

    package_metadata = package_locale_data.get("", {})
    package_version = package_metadata.get("version", None)
    package_domain = package_metadata.get("domain", "None")

    if package_lp_version and package_version and package_domain == package_lp_domain:
        package_version = parse_version(package_version)
        package_lp_version = parse_version(package_lp_version)

        if package_version > package_lp_version:
            # If package version is more recent, then update keys of the language pack
            result = language_pack_locale_data.copy()
            result.update(package_locale_data)

    return result


def get_installed_packages_locale(locale_: str) -> tuple[dict, str]:
    """
    Get all jupyterlab extensions installed that contain locale data.

    Returns
    -------
    tuple
        A tuple in the form `(locale_data_dict, message)`,
        where the `locale_data_dict` is an ordered list
        of available language packs:
            >>> {"package-name": locale_data, ...}

    Examples
    --------
    - `entry_points={"jupyterlab.locale": "package-name = package_module"}`
    - `entry_points={"jupyterlab.locale": "jupyterlab-git = jupyterlab_git"}`
    """
    found_package_locales, message = _get_installed_package_locales()
    packages_locale_data = {}
    messages = message.split("\n")
    if not message:
        for package_name, package_root_path in found_package_locales.items():
            locales = {}
            try:
                locale_path = os.path.join(package_root_path, LOCALE_DIR)
                # Handle letter casing
                locales = {
                    loc.lower(): loc
                    for loc in os.listdir(locale_path)
                    if os.path.isdir(os.path.join(locale_path, loc))
                }
            except Exception:
                messages.append(traceback.format_exc())

            if locale_.lower() in locales:
                locale_json_path = os.path.join(
                    locale_path,
                    locales[locale_.lower()],
                    LC_MESSAGES_DIR,
                    f"{package_name}.json",
                )
                if os.path.isfile(locale_json_path):
                    try:
                        with open(locale_json_path, encoding="utf-8") as fh:
                            packages_locale_data[package_name] = json.load(fh)
                    except Exception:
                        messages.append(traceback.format_exc())

    return packages_locale_data, "\n".join(messages)


# --- API
# ----------------------------------------------------------------------------
def get_language_packs(display_locale: str = DEFAULT_LOCALE) -> tuple[dict, str]:
    """
    Return the available language packs installed in the system.

    The returned information contains the languages displayed in the current
    locale.

    Parameters
    ----------
    display_locale: str, optional
        Default is DEFAULT_LOCALE.

    Returns
    -------
    tuple
        A tuple in the form `(locale_data_dict, message)`.
    """
    found_locales, message = _get_installed_language_pack_locales()
    locales = {}
    messages = message.split("\n")
    if not message:
        invalid_locales = []
        valid_locales = []
        messages = []
        for locale_ in found_locales:
            if is_valid_locale(locale_):
                valid_locales.append(locale_)
            else:
                invalid_locales.append(locale_)

        display_locale_ = display_locale if display_locale in valid_locales else DEFAULT_LOCALE
        locales = {
            DEFAULT_LOCALE: {
                "displayName": (
                    get_display_name(DEFAULT_LOCALE, display_locale_)
                    if display_locale != PSEUDO_LANGUAGE
                    else "Default"
                ),
                "nativeName": get_display_name(DEFAULT_LOCALE, DEFAULT_LOCALE),
            }
        }
        for locale_ in valid_locales:
            locales[locale_] = {
                "displayName": get_display_name(locale_, display_locale_),
                "nativeName": get_display_name(locale_, locale_),
            }

        if invalid_locales:
            if PSEUDO_LANGUAGE in invalid_locales:
                invalid_locales.remove(PSEUDO_LANGUAGE)
                locales[PSEUDO_LANGUAGE] = {
                    "displayName": "Pseudo-language",
                    # Trick to ensure the proper language is selected in the language menu
                    "nativeName": (
                        "to translate the UI"
                        if display_locale != PSEUDO_LANGUAGE
                        else "Pseudo-language"
                    ),
                }
            # Check again as the pseudo-language was maybe the only invalid locale
            if invalid_locales:
                messages.append(f"The following locales are invalid: {invalid_locales}!")

    return locales, "\n".join(messages)


def get_language_pack(locale_: str) -> tuple:
    """
    Get a language pack for a given `locale_` and update with any installed
    package locales.

    Returns
    -------
    tuple
        A tuple in the form `(locale_data_dict, message)`.

    Notes
    -----
    We call `_get_installed_language_pack_locales` via a subprocess to
    guarantee the results represent the most up-to-date entry point
    information, which seems to be defined on interpreter startup.
    """
    found_locales, message = _get_installed_language_pack_locales()
    found_packages_locales, message = get_installed_packages_locale(locale_)
    locale_data = {}
    messages = message.split("\n")
    if (
        not message
        and (locale_ == PSEUDO_LANGUAGE or is_valid_locale(locale_))
        and locale_ in found_locales
    ):
        path = found_locales[locale_]
        for root, __, files in os.walk(path, topdown=False):
            for name in files:
                if name.endswith(".json"):
                    pkg_name = name.replace(".json", "")
                    json_path = os.path.join(root, name)
                    try:
                        with open(json_path, encoding="utf-8") as fh:
                            merged_data = json.load(fh)
                    except Exception:
                        messages.append(traceback.format_exc())

                    # Load packages with locale data and merge them
                    if pkg_name in found_packages_locales:
                        pkg_data = found_packages_locales[pkg_name]
                        merged_data = merge_locale_data(merged_data, pkg_data)

                    locale_data[pkg_name] = merged_data

        # Check if package locales exist that do not exists in language pack
        for pkg_name, data in found_packages_locales.items():
            if pkg_name not in locale_data:
                locale_data[pkg_name] = data

    return locale_data, "\n".join(messages)


# --- Translators
# ----------------------------------------------------------------------------
class TranslationBundle:
    """
    Translation bundle providing gettext translation functionality.
    """

    def __init__(self, domain: str, locale_: str):
        """Initialize the bundle."""
        self._domain = domain
        self._locale = locale_
        self._translator = gettext.NullTranslations()

        self.update_locale(locale_)

    def update_locale(self, locale_: str) -> None:
        """
        Update the locale.

        Parameters
        ----------
        locale_: str
            The language name to use.
        """
        # TODO: Need to handle packages that provide their own .mo files
        self._locale = locale_
        localedir = None
        if locale_ != DEFAULT_LOCALE:
            language_pack_module = f"jupyterlab_language_pack_{locale_}"
            try:
                mod = importlib.import_module(language_pack_module)
                assert mod.__file__ is not None
                localedir = os.path.join(os.path.dirname(mod.__file__), LOCALE_DIR)
            except Exception:  # noqa: S110
                # no-op
                pass

        self._translator = gettext.translation(
            self._domain, localedir=localedir, languages=(self._locale,), fallback=True
        )

    def gettext(self, msgid: str) -> str:
        """
        Translate a singular string.

        Parameters
        ----------
        msgid: str
            The singular string to translate.

        Returns
        -------
        str
            The translated string.
        """
        return self._translator.gettext(msgid)

    def ngettext(self, msgid: str, msgid_plural: str, n: int) -> str:
        """
        Translate a singular string with pluralization.

        Parameters
        ----------
        msgid: str
            The singular string to translate.
        msgid_plural: str
            The plural string to translate.
        n: int
            The number for pluralization.

        Returns
        -------
        str
            The translated string.
        """
        return self._translator.ngettext(msgid, msgid_plural, n)

    def pgettext(self, msgctxt: str, msgid: str) -> str:
        """
        Translate a singular string with context.

        Parameters
        ----------
        msgctxt: str
            The message context.
        msgid: str
            The singular string to translate.

        Returns
        -------
        str
            The translated string.
        """
        # Python 3.7 or lower does not offer translations based on context.
        # On these versions `pgettext` falls back to `gettext`
        if PY37_OR_LOWER:
            translation = self._translator.gettext(msgid)
        else:
            translation = self._translator.pgettext(msgctxt, msgid)

        return translation

    def npgettext(self, msgctxt: str, msgid: str, msgid_plural: str, n: int) -> str:
        """
        Translate a singular string with context and pluralization.

        Parameters
        ----------
        msgctxt: str
            The message context.
        msgid: str
            The singular string to translate.
        msgid_plural: str
            The plural string to translate.
        n: int
            The number for pluralization.

        Returns
        -------
        str
            The translated string.
        """
        # Python 3.7 or lower does not offer translations based on context.
        # On these versions `npgettext` falls back to `ngettext`
        if PY37_OR_LOWER:
            translation = self._translator.ngettext(msgid, msgid_plural, n)
        else:
            translation = self._translator.npgettext(msgctxt, msgid, msgid_plural, n)

        return translation

    # Shorthands
    def __(self, msgid: str) -> str:
        """
        Shorthand for gettext.

        Parameters
        ----------
        msgid: str
            The singular string to translate.

        Returns
        -------
        str
            The translated string.
        """
        return self.gettext(msgid)

    def _n(self, msgid: str, msgid_plural: str, n: int) -> str:
        """
        Shorthand for ngettext.

        Parameters
        ----------
        msgid: str
            The singular string to translate.
        msgid_plural: str
            The plural string to translate.
        n: int
            The number for pluralization.

        Returns
        -------
        str
            The translated string.
        """
        return self.ngettext(msgid, msgid_plural, n)

    def _p(self, msgctxt: str, msgid: str) -> str:
        """
        Shorthand for pgettext.

        Parameters
        ----------
        msgctxt: str
            The message context.
        msgid: str
            The singular string to translate.

        Returns
        -------
        str
            The translated string.
        """
        return self.pgettext(msgctxt, msgid)

    def _np(self, msgctxt: str, msgid: str, msgid_plural: str, n: int) -> str:
        """
        Shorthand for npgettext.

        Parameters
        ----------
        msgctxt: str
            The message context.
        msgid: str
            The singular string to translate.
        msgid_plural: str
            The plural string to translate.
        n: int
            The number for pluralization.

        Returns
        -------
        str
            The translated string.
        """
        return self.npgettext(msgctxt, msgid, msgid_plural, n)


class translator:
    """
    Translations manager.
    """

    _TRANSLATORS: dict[str, TranslationBundle] = {}
    _LOCALE = SYS_LOCALE

    @staticmethod
    def normalize_domain(domain: str) -> str:
        """Normalize a domain name.

        Parameters
        ----------
        domain: str
            Domain to normalize

        Returns
        -------
        str
            Normalized domain
        """
        return domain.replace("-", "_")

    @classmethod
    def set_locale(cls, locale_: str) -> None:
        """
        Set locale for the translation bundles based on the settings.

        Parameters
        ----------
        locale_: str
            The language name to use.
        """
        if locale_ == cls._LOCALE:
            # Nothing to do bail early
            return

        if is_valid_locale(locale_):
            cls._LOCALE = locale_
            for _, bundle in cls._TRANSLATORS.items():
                bundle.update_locale(locale_)

    @classmethod
    def load(cls, domain: str) -> TranslationBundle:
        """
        Load translation domain.

        The domain is usually the normalized ``package_name``.

        Parameters
        ----------
        domain: str
            The translations domain. The normalized python package name.

        Returns
        -------
        Translator
            A translator instance bound to the domain.
        """
        norm_domain = translator.normalize_domain(domain)
        if norm_domain in cls._TRANSLATORS:
            trans = cls._TRANSLATORS[norm_domain]
        else:
            trans = TranslationBundle(norm_domain, cls._LOCALE)
            cls._TRANSLATORS[norm_domain] = trans

        return trans

    @staticmethod
    def _translate_schema_strings(
        translations: Any,
        schema: dict,
        prefix: str = "",
        to_translate: dict[Pattern, str] | None = None,
    ) -> None:
        """Translate a schema in-place."""
        if to_translate is None:
            to_translate = _prepare_schema_patterns(schema)

        for key, value in schema.items():
            path = prefix + "/" + key

            if isinstance(value, str):
                matched = False
                for pattern, context in to_translate.items():  # noqa: B007
                    if pattern.fullmatch(path):
                        matched = True
                        break
                if matched:
                    schema[key] = translations.pgettext(context, value)
            elif isinstance(value, dict):
                translator._translate_schema_strings(
                    translations,
                    value,
                    prefix=path,
                    to_translate=to_translate,
                )
            elif isinstance(value, list):
                for i, element in enumerate(value):
                    if not isinstance(element, dict):
                        continue
                    translator._translate_schema_strings(
                        translations,
                        element,
                        prefix=path + "[" + str(i) + "]",
                        to_translate=to_translate,
                    )

    @staticmethod
    def translate_schema(schema: dict) -> dict:
        """Translate a schema.

        Parameters
        ----------
        schema: dict
            The schema to be translated

        Returns
        -------
        Dict
            The translated schema
        """
        if translator._LOCALE == DEFAULT_LOCALE:
            return schema

        translations = translator.load(
            schema.get(_lab_i18n_config, {}).get("domain", DEFAULT_DOMAIN)
        )

        new_schema = schema.copy()
        translator._translate_schema_strings(translations, new_schema)

        return new_schema
