"""
Notes
-----
Code written using below textbook as a reference.
Results are checked against the expected outcomes in the text book.

Properties:
Hyndman, Rob J., and George Athanasopoulos. Forecasting: principles and
practice. OTexts, 2014.

Author: Terence L van Zyl
Modified: Kevin Sheppard
"""
from statsmodels.compat.pandas import deprecate_kwarg

import contextlib
from typing import Any
from collections.abc import Hashable, Sequence
import warnings

import numpy as np
import pandas as pd
from scipy.optimize import basinhopping, least_squares, minimize
from scipy.special import inv_boxcox
from scipy.stats import boxcox

from statsmodels.tools.validation import (
    array_like,
    bool_like,
    dict_like,
    float_like,
    int_like,
    string_like,
)
from statsmodels.tsa.base.tsa_model import TimeSeriesModel
from statsmodels.tsa.exponential_smoothing.ets import (
    _initialization_heuristic,
    _initialization_simple,
)
from statsmodels.tsa.holtwinters import (
    _exponential_smoothers as smoothers,
    _smoothers as py_smoothers,
)
from statsmodels.tsa.holtwinters._exponential_smoothers import HoltWintersArgs
from statsmodels.tsa.holtwinters._smoothers import (
    to_restricted,
    to_unrestricted,
)
from statsmodels.tsa.holtwinters.results import (
    HoltWintersResults,
    HoltWintersResultsWrapper,
)
from statsmodels.tsa.tsatools import freq_to_period

SMOOTHERS = {
    ("mul", "add"): smoothers.holt_win_add_mul_dam,
    ("mul", "mul"): smoothers.holt_win_mul_mul_dam,
    ("mul", None): smoothers.holt_win__mul,
    ("add", "add"): smoothers.holt_win_add_add_dam,
    ("add", "mul"): smoothers.holt_win_mul_add_dam,
    ("add", None): smoothers.holt_win__add,
    (None, "add"): smoothers.holt_add_dam,
    (None, "mul"): smoothers.holt_mul_dam,
    (None, None): smoothers.holt__,
}

PY_SMOOTHERS = {
    ("mul", "add"): py_smoothers.holt_win_add_mul_dam,
    ("mul", "mul"): py_smoothers.holt_win_mul_mul_dam,
    ("mul", None): py_smoothers.holt_win__mul,
    ("add", "add"): py_smoothers.holt_win_add_add_dam,
    ("add", "mul"): py_smoothers.holt_win_mul_add_dam,
    ("add", None): py_smoothers.holt_win__add,
    (None, "add"): py_smoothers.holt_add_dam,
    (None, "mul"): py_smoothers.holt_mul_dam,
    (None, None): py_smoothers.holt__,
}


def opt_wrapper(func):
    def f(*args, **kwargs):
        err = func(*args, **kwargs)
        if isinstance(err, np.ndarray):
            return err.T @ err
        return err

    return f


class _OptConfig:
    alpha: float
    beta: float
    phi: float
    gamma: float
    level: float
    trend: float
    seasonal: np.ndarray
    y: np.ndarray
    params: np.ndarray
    mask: np.ndarray
    mle_retvals: Any

    def unpack_parameters(self, params) -> "_OptConfig":
        self.alpha = params[0]
        self.beta = params[1]
        self.gamma = params[2]
        self.level = params[3]
        self.trend = params[4]
        self.phi = params[5]
        self.seasonal = params[6:]

        return self


class ExponentialSmoothing(TimeSeriesModel):
    """
    Holt Winter's Exponential Smoothing

    Parameters
    ----------
    endog : array_like
        The time series to model.
    trend : {"add", "mul", "additive", "multiplicative", None}, optional
        Type of trend component.
    damped_trend : bool, optional
        Should the trend component be damped.
    seasonal : {"add", "mul", "additive", "multiplicative", None}, optional
        Type of seasonal component.
    seasonal_periods : int, optional
        The number of periods in a complete seasonal cycle, e.g., 4 for
        quarterly data or 7 for daily data with a weekly cycle.
    initialization_method : str, optional
        Method for initialize the recursions. One of:

        * None
        * 'estimated'
        * 'heuristic'
        * 'legacy-heuristic'
        * 'known'

        None defaults to the pre-0.12 behavior where initial values
        are passed as part of ``fit``. If any of the other values are
        passed, then the initial values must also be set when constructing
        the model. If 'known' initialization is used, then `initial_level`
        must be passed, as well as `initial_trend` and `initial_seasonal` if
        applicable. Default is 'estimated'. "legacy-heuristic" uses the same
        values that were used in statsmodels 0.11 and earlier.
    initial_level : float, optional
        The initial level component. Required if estimation method is "known".
        If set using either "estimated" or "heuristic" this value is used.
        This allows one or more of the initial values to be set while
        deferring to the heuristic for others or estimating the unset
        parameters.
    initial_trend : float, optional
        The initial trend component. Required if estimation method is "known".
        If set using either "estimated" or "heuristic" this value is used.
        This allows one or more of the initial values to be set while
        deferring to the heuristic for others or estimating the unset
        parameters.
    initial_seasonal : array_like, optional
        The initial seasonal component. An array of length `seasonal`
        or length `seasonal - 1` (in which case the last initial value
        is computed to make the average effect zero). Only used if
        initialization is 'known'. Required if estimation method is "known".
        If set using either "estimated" or "heuristic" this value is used.
        This allows one or more of the initial values to be set while
        deferring to the heuristic for others or estimating the unset
        parameters.
    use_boxcox : {True, False, 'log', float}, optional
        Should the Box-Cox transform be applied to the data first? If 'log'
        then apply the log. If float then use the value as lambda.
    bounds : dict[str, tuple[float, float]], optional
        An dictionary containing bounds for the parameters in the model,
        excluding the initial values if estimated. The keys of the dictionary
        are the variable names, e.g., smoothing_level or initial_slope.
        The initial seasonal variables are labeled initial_seasonal.<j>
        for j=0,...,m-1 where m is the number of period in a full season.
        Use None to indicate a non-binding constraint, e.g., (0, None)
        constrains a parameter to be non-negative.
    dates : array_like of datetime, optional
        An array-like object of datetime objects. If a Pandas object is given
        for endog, it is assumed to have a DateIndex.
    freq : str, optional
        The frequency of the time-series. A Pandas offset or 'B', 'D', 'W',
        'M', 'A', or 'Q'. This is optional if dates are given.
    missing : str
        Available options are 'none', 'drop', and 'raise'. If 'none', no nan
        checking is done. If 'drop', any observations with nans are dropped.
        If 'raise', an error is raised. Default is 'none'.

    Notes
    -----
    This is a full implementation of the holt winters exponential smoothing as
    per [1]_. This includes all the unstable methods as well as the stable
    methods. The implementation of the library covers the functionality of the
    R library as much as possible whilst still being Pythonic.

    See the notebook `Exponential Smoothing
    <../examples/notebooks/generated/exponential_smoothing.html>`__
    for an overview.

    References
    ----------
    .. [1] Hyndman, Rob J., and George Athanasopoulos. Forecasting: principles
        and practice. OTexts, 2014.
    """

    @deprecate_kwarg("damped", "damped_trend")
    def __init__(
        self,
        endog,
        trend=None,
        damped_trend=False,
        seasonal=None,
        *,
        seasonal_periods=None,
        initialization_method="estimated",
        initial_level=None,
        initial_trend=None,
        initial_seasonal=None,
        use_boxcox=False,
        bounds=None,
        dates=None,
        freq=None,
        missing="none",
    ):
        super().__init__(endog, None, dates, freq, missing=missing)
        self._y = self._data = array_like(
            endog, "endog", ndim=1, contiguous=True, order="C"
        )
        options = ("add", "mul", "additive", "multiplicative")
        trend = string_like(trend, "trend", options=options, optional=True)
        if trend in ["additive", "multiplicative"]:
            trend = {"additive": "add", "multiplicative": "mul"}[trend]
        self.trend = trend
        self.damped_trend = bool_like(damped_trend, "damped_trend")
        seasonal = string_like(
            seasonal, "seasonal", options=options, optional=True
        )
        if seasonal in ["additive", "multiplicative"]:
            seasonal = {"additive": "add", "multiplicative": "mul"}[seasonal]
        self.seasonal = seasonal
        self.has_trend = trend in ["mul", "add"]
        self.has_seasonal = seasonal in ["mul", "add"]
        if (self.trend == "mul" or self.seasonal == "mul") and not np.all(
            self._data > 0.0
        ):
            raise ValueError(
                "endog must be strictly positive when using"
                "multiplicative trend or seasonal components."
            )
        if self.damped_trend and not self.has_trend:
            raise ValueError("Can only dampen the trend component")
        if self.has_seasonal:
            self.seasonal_periods = int_like(
                seasonal_periods, "seasonal_periods", optional=True
            )
            if seasonal_periods is None:
                try:
                    self.seasonal_periods = freq_to_period(self._index_freq)
                except Exception:
                    raise ValueError(
                        "seasonal_periods has not been provided and index "
                        "does not have a known freq. You must provide "
                        "seasonal_periods"
                    )
            if self.seasonal_periods <= 1:
                raise ValueError("seasonal_periods must be larger than 1.")
            assert self.seasonal_periods is not None
        else:
            self.seasonal_periods = 0
        self.nobs = len(self.endog)
        options = ("known", "estimated", "heuristic", "legacy-heuristic")
        self._initialization_method = string_like(
            initialization_method,
            "initialization_method",
            optional=False,
            options=options,
        )
        self._initial_level = float_like(
            initial_level, "initial_level", optional=True
        )
        self._initial_trend = float_like(
            initial_trend, "initial_trend", optional=True
        )
        self._initial_seasonal = array_like(
            initial_seasonal, "initial_seasonal", optional=True
        )
        estimated = self._initialization_method == "estimated"
        self._estimate_level = estimated
        self._estimate_trend = estimated and self.trend is not None
        self._estimate_seasonal = estimated and self.seasonal is not None
        self._bounds = self._check_bounds(bounds)
        self._use_boxcox = use_boxcox
        self._lambda = np.nan
        self._y = self._boxcox()
        self._initialize()
        self._fixed_parameters = {}

    def _check_bounds(self, bounds):
        bounds = dict_like(bounds, "bounds", optional=True)
        if bounds is None:
            return
        msg = (
            "bounds must be a dictionary of 2-element tuples of the form"
            " (lb, ub) where lb < ub, lb>=0 and ub<=1"
        )
        variables = self._ordered_names()
        for key in bounds:
            if key not in variables:
                supported = ", ".join(variables[:-1])
                supported += ", and " + variables[-1]
                raise KeyError(
                    f"{key} does not match the list of supported variables "
                    f"names: {supported}."
                )
            bound = bounds[key]
            if not isinstance(bound, tuple):
                raise TypeError(msg)
            lb = bound[0] if bound[0] is not None else -np.inf
            ub = bound[1] if bound[1] is not None else np.inf
            if len(bound) != 2 or lb >= ub:
                raise ValueError(msg)
            if ("smoothing" in key or "damp" in key) and (
                bound[0] < 0.0 or bound[1] > 1.0
            ):
                raise ValueError(
                    f"{key} must have a lower bound >= 0.0 and <= 1.0"
                )
        return bounds

    def _boxcox(self):
        if self._use_boxcox is None or self._use_boxcox is False:
            self._lambda = np.nan
            return self._y
        if self._use_boxcox is True:
            y, self._lambda = boxcox(self._y)
        elif isinstance(self._use_boxcox, (int, float)):
            self._lambda = float(self._use_boxcox)
            y = boxcox(self._y, self._use_boxcox)
        else:
            raise TypeError("use_boxcox must be True, False or a float.")
        return y

    @contextlib.contextmanager
    def fix_params(self, values):
        """
        Temporarily fix parameters for estimation.

        Parameters
        ----------
        values : dict
            Values to fix. The key is the parameter name and the value is the
            fixed value.

        Yields
        ------
        None
            No value returned.

        Examples
        --------
        >>> from statsmodels.datasets.macrodata import load_pandas
        >>> data = load_pandas()
        >>> import statsmodels.tsa.api as tsa
        >>> mod = tsa.ExponentialSmoothing(data.data.realcons, trend="add",
        ...                                initialization_method="estimated")
        >>> with mod.fix_params({"smoothing_level": 0.2}):
        ...     mod.fit()
        """
        values = dict_like(values, "values")
        valid_keys = ("smoothing_level",)
        if self.has_trend:
            valid_keys += ("smoothing_trend",)
        if self.has_seasonal:
            valid_keys += ("smoothing_seasonal",)
            m = self.seasonal_periods
            valid_keys += tuple([f"initial_seasonal.{i}" for i in range(m)])
        if self.damped_trend:
            valid_keys += ("damping_trend",)
        if self._initialization_method in ("estimated", None):
            extra_keys = [
                key.replace("smoothing_", "initial_")
                for key in valid_keys
                if "smoothing_" in key
            ]
            valid_keys += tuple(extra_keys)

        for key in values:
            if key not in valid_keys:
                valid = ", ".join(valid_keys[:-1]) + ", and " + valid_keys[-1]
                raise KeyError(
                    f"{key} if not allowed. Only {valid} are supported in "
                    "this specification."
                )

        if "smoothing_level" in values:
            alpha = values["smoothing_level"]
            if alpha <= 0.0:
                raise ValueError("smoothing_level must be in (0, 1)")
            beta = values.get("smoothing_trend", 0.0)
            if beta > alpha:
                raise ValueError("smoothing_trend must be <= smoothing_level")
            gamma = values.get("smoothing_seasonal", 0.0)
            if gamma > 1 - alpha:
                raise ValueError(
                    "smoothing_seasonal must be <= 1 - smoothing_level"
                )

        try:
            self._fixed_parameters = values
            yield
        finally:
            self._fixed_parameters = {}

    def _initialize(self):
        if self._initialization_method == "known":
            return self._initialize_known()
        msg = (
            f"initialization method is {self._initialization_method} but "
            "initial_{0} has been set."
        )
        if self._initial_level is not None:
            raise ValueError(msg.format("level"))
        if self._initial_trend is not None:
            raise ValueError(msg.format("trend"))
        if self._initial_seasonal is not None:
            raise ValueError(msg.format("seasonal"))
        if self._initialization_method == "legacy-heuristic":
            return self._initialize_legacy()
        elif self._initialization_method == "heuristic":
            return self._initialize_heuristic()
        elif self._initialization_method == "estimated":
            if self.nobs < 10 + 2 * (self.seasonal_periods // 2):
                return self._initialize_simple()
            else:
                return self._initialize_heuristic()

    def _initialize_simple(self):
        trend = self.trend if self.has_trend else False
        seasonal = self.seasonal if self.has_seasonal else False
        lvl, trend, seas = _initialization_simple(
            self._y, trend, seasonal, self.seasonal_periods
        )
        self._initial_level = lvl
        self._initial_trend = trend
        self._initial_seasonal = seas

    def _initialize_heuristic(self):
        trend = self.trend if self.has_trend else False
        seasonal = self.seasonal if self.has_seasonal else False
        lvl, trend, seas = _initialization_heuristic(
            self._y, trend, seasonal, self.seasonal_periods
        )
        self._initial_level = lvl
        self._initial_trend = trend
        self._initial_seasonal = seas

    def _initialize_legacy(self):
        lvl, trend, seasonal = self.initial_values(force=True)
        self._initial_level = lvl
        self._initial_trend = trend
        self._initial_seasonal = seasonal

    def _initialize_known(self):
        msg = "initialization is 'known' but initial_{0} not given"
        if self._initial_level is None:
            raise ValueError(msg.format("level"))
        excess = "initial_{0} set but model has no {0} component"
        if self.has_trend and self._initial_trend is None:
            raise ValueError(msg.format("trend"))
        elif not self.has_trend and self._initial_trend is not None:
            raise ValueError(excess.format("trend"))
        if self.has_seasonal and self._initial_seasonal is None:
            raise ValueError(msg.format("seasonal"))
        elif not self.has_seasonal and self._initial_seasonal is not None:
            raise ValueError(excess.format("seasonal"))

    def predict(self, params, start=None, end=None):
        """
        In-sample and out-of-sample prediction.

        Parameters
        ----------
        params : ndarray
            The fitted model parameters.
        start : int, str, or datetime
            Zero-indexed observation number at which to start forecasting, ie.,
            the first forecast is start. Can also be a date string to
            parse or a datetime type.
        end : int, str, or datetime
            Zero-indexed observation number at which to end forecasting, ie.,
            the first forecast is start. Can also be a date string to
            parse or a datetime type.

        Returns
        -------
        ndarray
            The predicted values.
        """
        if start is None:
            freq = getattr(self._index, "freq", 1)
            if isinstance(freq, int):
                start = self._index.shape[0]
            else:
                start = self._index[-1] + freq
        start, end, out_of_sample, _ = self._get_prediction_index(
            start=start, end=end
        )
        if out_of_sample > 0:
            res = self._predict(h=out_of_sample, **params)
        else:
            res = self._predict(h=0, **params)
        return res.fittedfcast[start : end + out_of_sample + 1]

    def _enforce_bounds(self, p, sel, lb, ub):
        initial_p = p[sel]

        # Ensure strictly inbounds
        loc = initial_p <= lb
        upper = ub[loc].copy()
        upper[~np.isfinite(upper)] = 100.0
        eps = 1e-4
        initial_p[loc] = lb[loc] + eps * (upper - lb[loc])

        loc = initial_p >= ub
        lower = lb[loc].copy()
        lower[~np.isfinite(lower)] = -100.0
        eps = 1e-4
        initial_p[loc] = ub[loc] - eps * (ub[loc] - lower)

        return initial_p

    @staticmethod
    def _check_blocked_keywords(
        d: dict, keys: Sequence[Hashable], name="kwargs"
    ):
        for key in keys:
            if key in d:
                raise ValueError(f"{name} must not contain '{key}'")

    def _check_bound_feasibility(self, bounds):
        if bounds[1][0] > bounds[0][1]:
            raise ValueError(
                "The bounds for smoothing_trend and smoothing_level are "
                "incompatible since smoothing_trend <= smoothing_level."
            )
        if bounds[2][0] > (1 - bounds[0][1]):
            raise ValueError(
                "The bounds for smoothing_seasonal and smoothing_level "
                "are incompatible since smoothing_seasonal <= "
                "1 - smoothing_level."
            )

    @staticmethod
    def _setup_brute(sel, bounds, alpha):
        # More points when fewer parameters
        ns = 87 // sel[:3].sum()

        if not sel[0]:
            # Easy case since no cross-constraints
            nparams = int(sel[1]) + int(sel[2])
            args = []
            for i in range(1, 3):
                if sel[i]:
                    bound = bounds[i]
                    step = bound[1] - bound[0]
                    lb = bound[0] + 0.005 * step
                    if i == 1:
                        ub = min(bound[1], alpha) - 0.005 * step
                    else:
                        ub = min(bound[1], 1 - alpha) - 0.005 * step
                    args.append(np.linspace(lb, ub, ns))
            points = np.stack(np.meshgrid(*args))
            points = points.reshape((nparams, -1)).T
            return np.ascontiguousarray(points)

        bound = bounds[0]
        step = 0.005 * (bound[1] - bound[0])
        points = np.linspace(bound[0] + step, bound[1] - step, ns)
        if not sel[1] and not sel[2]:
            return points[:, None]

        combined = []
        b_bounds = bounds[1]
        g_bounds = bounds[2]
        if sel[1] and sel[2]:
            for a in points:
                b_lb = b_bounds[0]
                b_ub = min(b_bounds[1], a)
                g_lb = g_bounds[0]
                g_ub = min(g_bounds[1], 1 - a)
                if b_lb > b_ub or g_lb > g_ub:
                    # infeasible point
                    continue
                nb = int(np.ceil(ns * np.sqrt(a)))
                ng = int(np.ceil(ns * np.sqrt(1 - a)))
                b = np.linspace(b_lb, b_ub, nb)
                g = np.linspace(g_lb, g_ub, ng)
                both = np.stack(np.meshgrid(b, g)).reshape(2, -1).T
                final = np.empty((both.shape[0], 3))
                final[:, 0] = a
                final[:, 1:] = both
                combined.append(final)
        elif sel[1]:
            for a in points:
                b_lb = b_bounds[0]
                b_ub = min(b_bounds[1], a)
                if b_lb > b_ub:
                    # infeasible point
                    continue
                nb = int(np.ceil(ns * np.sqrt(a)))
                final = np.empty((nb, 2))
                final[:, 0] = a
                final[:, 1] = np.linspace(b_lb, b_ub, nb)
                combined.append(final)
        else:  # sel[2]
            for a in points:
                g_lb = g_bounds[0]
                g_ub = min(g_bounds[1], 1 - a)
                if g_lb > g_ub:
                    # infeasible point
                    continue
                ng = int(np.ceil(ns * np.sqrt(1 - a)))
                final = np.empty((ng, 2))
                final[:, 1] = np.linspace(g_lb, g_ub, ng)
                final[:, 0] = a
                combined.append(final)

        return np.vstack(combined)

    def _ordered_names(self):
        names = (
            "smoothing_level",
            "smoothing_trend",
            "smoothing_seasonal",
            "initial_level",
            "initial_trend",
            "damping_trend",
        )
        m = self.seasonal_periods
        names += tuple([f"initial_seasonal.{i}" for i in range(m)])
        return names

    def _update_for_fixed(self, sel, alpha, beta, gamma, phi, l0, b0, s0):
        if self._fixed_parameters:
            fixed = self._fixed_parameters
            names = self._ordered_names()
            not_fixed = np.array([name not in fixed for name in names])
            if (~sel[~not_fixed]).any():
                invalid = []
                for name, s, nf in zip(names, sel, not_fixed):
                    if not s and not nf:
                        invalid.append(name)
                invalid_names = ", ".join(invalid)
                raise ValueError(
                    "Cannot fix a parameter that is not being "
                    f"estimated: {invalid_names}"
                )

            sel &= not_fixed
            alpha = fixed.get("smoothing_level", alpha)
            beta = fixed.get("smoothing_trend", beta)
            gamma = fixed.get("smoothing_seasonal", gamma)
            phi = fixed.get("damping_trend", phi)
            l0 = fixed.get("initial_level", l0)
            b0 = fixed.get("initial_trend", b0)
            for i in range(self.seasonal_periods):
                s0[i] = fixed.get(f"initial_seasonal.{i}", s0[i])
        return sel, alpha, beta, gamma, phi, l0, b0, s0

    def _construct_bounds(self):
        trend_lb = 0.0 if self.trend == "mul" else None
        season_lb = 0.0 if self.seasonal == "mul" else None
        lvl_lb = None if trend_lb is None and season_lb is None else 0.0
        bounds = [
            (0.0, 1.0),  # alpha
            (0.0, 1.0),  # beta
            (0.0, 1.0),  # gamma
            (lvl_lb, None),  # level
            (trend_lb, None),  # trend
            (0.8, 0.995),  # phi
        ]
        bounds += [(season_lb, None)] * self.seasonal_periods
        if self._bounds is not None:
            assert isinstance(self._bounds, dict)
            for i, name in enumerate(self._ordered_names()):
                bounds[i] = self._bounds.get(name, bounds[i])
        # Update bounds to account for fixed parameters
        fixed = self._fixed_parameters
        if "smoothing_level" in fixed:
            # Update bounds if fixed alpha
            alpha = fixed["smoothing_level"]
            # beta <= alpha
            if bounds[1][1] > alpha:
                bounds[1] = (bounds[1][0], alpha)
            # gamma <= 1 - alpha
            if bounds[2][1] > (1 - alpha):
                bounds[2] = (bounds[2][0], 1 - alpha)
            # gamma <= 1 - alpha
        if "smoothing_trend" in fixed:
            # beta <= alpha
            beta = fixed["smoothing_trend"]
            bounds[0] = (max(beta, bounds[0][0]), bounds[0][1])
        if "smoothing_seasonal" in fixed:
            gamma = fixed["smoothing_seasonal"]
            # gamma <= 1 - alpha => alpha <= 1 - gamma
            bounds[0] = (bounds[0][0], min(1 - gamma, bounds[0][1]))
        # Ensure bounds are feasible
        for i, name in enumerate(self._ordered_names()):
            lb = bounds[i][0] if bounds[i][0] is not None else -np.inf
            ub = bounds[i][1] if bounds[i][1] is not None else np.inf
            if lb >= ub:
                raise ValueError(
                    "After adjusting for user-provided bounds fixed values, "
                    f"the resulting set of bounds for {name}, {bounds[i]}, "
                    "are infeasible."
                )
        self._check_bound_feasibility(bounds)
        return bounds

    def _get_starting_values(
        self,
        params,
        start_params,
        use_brute,
        sel,
        hw_args,
        bounds,
        alpha,
        func,
    ):
        if start_params is None and use_brute and np.any(sel[:3]):
            # Have a quick look in the region for a good starting place for
            # alpha, beta & gamma using fixed values for initial
            m = self.seasonal_periods
            sv_sel = np.array([False] * (6 + m))
            sv_sel[:3] = True
            sv_sel &= sel
            hw_args.xi = sv_sel.astype(np.int64)
            hw_args.transform = False
            # Setup the grid points, respecting constraints
            points = self._setup_brute(sv_sel, bounds, alpha)
            opt = opt_wrapper(func)
            best_val = np.inf
            best_params = points[0]
            for point in points:
                val = opt(point, hw_args)
                if val < best_val:
                    best_params = point
                    best_val = val
            params[sv_sel] = best_params
        elif start_params is not None:
            if len(start_params) != sel.sum():
                msg = "start_params must have {0} values but has {1}."
                nxi, nsp = len(sel), len(start_params)
                raise ValueError(msg.format(nxi, nsp))
            params[sel] = start_params
        return params

    def _optimize_parameters(
        self, data: _OptConfig, use_brute, method, kwargs
    ) -> _OptConfig:
        # Prepare starting values
        alpha = data.alpha
        beta = data.beta
        phi = data.phi
        gamma = data.gamma
        y = data.y
        start_params = data.params

        has_seasonal = self.has_seasonal
        has_trend = self.has_trend
        trend = self.trend
        seasonal = self.seasonal
        damped_trend = self.damped_trend

        m = self.seasonal_periods
        params = np.zeros(6 + m)
        l0, b0, s0 = self.initial_values(
            initial_level=data.level, initial_trend=data.trend
        )

        init_alpha = alpha if alpha is not None else 0.5 / max(m, 1)
        init_beta = beta
        if beta is None and has_trend:
            init_beta = 0.1 * init_alpha
        init_gamma = gamma
        if has_seasonal and gamma is None:
            init_gamma = 0.05 * (1 - init_alpha)
        init_phi = phi if phi is not None else 0.99
        # Selection of parameters to optimize
        sel = np.array(
            [
                alpha is None,
                has_trend and beta is None,
                has_seasonal and gamma is None,
                self._estimate_level,
                self._estimate_trend,
                damped_trend and phi is None,
            ]
            + [has_seasonal and self._estimate_seasonal] * m,
        )
        (
            sel,
            init_alpha,
            init_beta,
            init_gamma,
            init_phi,
            l0,
            b0,
            s0,
        ) = self._update_for_fixed(
            sel, init_alpha, init_beta, init_gamma, init_phi, l0, b0, s0
        )

        func = SMOOTHERS[(seasonal, trend)]
        params[:6] = [init_alpha, init_beta, init_gamma, l0, b0, init_phi]
        if m:
            params[-m:] = s0
        if not np.any(sel):
            from statsmodels.tools.sm_exceptions import EstimationWarning

            message = (
                "Model has no free parameters to estimate. Set "
                "optimized=False to suppress this warning"
            )
            warnings.warn(message, EstimationWarning, stacklevel=3)
            data = data.unpack_parameters(params)
            data.params = params
            data.mask = sel

            return data
        orig_bounds = self._construct_bounds()

        bounds = np.array(orig_bounds[:3], dtype=float)
        hw_args = HoltWintersArgs(
            sel.astype(np.int64), params, bounds, y, m, self.nobs
        )
        params = self._get_starting_values(
            params,
            start_params,
            use_brute,
            sel,
            hw_args,
            bounds,
            init_alpha,
            func,
        )

        # We always use [0, 1] for a, b and g and handle transform inside
        mod_bounds = [(0, 1)] * 3 + orig_bounds[3:]
        relevant_bounds = [bnd for bnd, flag in zip(mod_bounds, sel) if flag]
        bounds = np.array(relevant_bounds, dtype=float)
        lb, ub = bounds.T
        lb[np.isnan(lb)] = -np.inf
        ub[np.isnan(ub)] = np.inf
        hw_args.xi = sel.astype(np.int64)

        # Ensure strictly inbounds
        initial_p = self._enforce_bounds(params, sel, lb, ub)
        # Transform to unrestricted space
        params[sel] = initial_p
        params[:3] = to_unrestricted(params, sel, hw_args.bounds)
        initial_p = params[sel]
        # Ensure parameters are transformed internally
        hw_args.transform = True
        if method in ("least_squares", "ls"):
            # Least squares uses a different format for bounds
            ls_bounds = lb, ub
            self._check_blocked_keywords(kwargs, ("args", "bounds"))
            res = least_squares(
                func, initial_p, bounds=ls_bounds, args=(hw_args,), **kwargs
            )
            success = res.success
        elif method in ("basinhopping", "bh"):
            # Take a deeper look in the local minimum we are in to find the
            # best solution to parameters, maybe hop around to try escape the
            # local minimum we may be in.
            minimizer_kwargs = {"args": (hw_args,), "bounds": relevant_bounds}
            kwargs = kwargs.copy()
            if "minimizer_kwargs" in kwargs:
                self._check_blocked_keywords(
                    kwargs["minimizer_kwargs"],
                    ("args", "bounds"),
                    name="kwargs['minimizer_kwargs']",
                )
                minimizer_kwargs.update(kwargs["minimizer_kwargs"])
                del kwargs["minimizer_kwargs"]
            default_kwargs = {
                "minimizer_kwargs": minimizer_kwargs,
                "stepsize": 0.01,
            }
            default_kwargs.update(kwargs)
            obj = opt_wrapper(func)
            res = basinhopping(obj, initial_p, **default_kwargs)
            success = res.lowest_optimization_result.success
        else:
            obj = opt_wrapper(func)
            self._check_blocked_keywords(kwargs, ("args", "bounds", "method"))
            res = minimize(
                obj,
                initial_p,
                args=(hw_args,),
                bounds=relevant_bounds,
                method=method,
                **kwargs,
            )
            success = res.success
        # finally transform to restricted space
        params[sel] = res.x
        params[:3] = to_restricted(params, sel, hw_args.bounds)
        res.x = params[sel]

        if not success:
            from statsmodels.tools.sm_exceptions import ConvergenceWarning

            warnings.warn(
                "Optimization failed to converge. Check mle_retvals.",
                ConvergenceWarning,
            )
        params[sel] = res.x

        data.unpack_parameters(params)
        data.params = params
        data.mask = sel
        data.mle_retvals = res

        return data

    @deprecate_kwarg("smoothing_slope", "smoothing_trend")
    @deprecate_kwarg("initial_slope", "initial_trend")
    @deprecate_kwarg("damping_slope", "damping_trend")
    def fit(
        self,
        smoothing_level=None,
        smoothing_trend=None,
        smoothing_seasonal=None,
        damping_trend=None,
        *,
        optimized=True,
        remove_bias=False,
        start_params=None,
        method=None,
        minimize_kwargs=None,
        use_brute=True,
        use_boxcox=None,
        use_basinhopping=None,
        initial_level=None,
        initial_trend=None,
    ):
        """
        Fit the model

        Parameters
        ----------
        smoothing_level : float, optional
            The alpha value of the simple exponential smoothing, if the value
            is set then this value will be used as the value.
        smoothing_trend :  float, optional
            The beta value of the Holt's trend method, if the value is
            set then this value will be used as the value.
        smoothing_seasonal : float, optional
            The gamma value of the holt winters seasonal method, if the value
            is set then this value will be used as the value.
        damping_trend : float, optional
            The phi value of the damped method, if the value is
            set then this value will be used as the value.
        optimized : bool, optional
            Estimate model parameters by maximizing the log-likelihood.
        remove_bias : bool, optional
            Remove bias from forecast values and fitted values by enforcing
            that the average residual is equal to zero.
        start_params : array_like, optional
            Starting values to used when optimizing the fit.  If not provided,
            starting values are determined using a combination of grid search
            and reasonable values based on the initial values of the data. See
            the notes for the structure of the model parameters.
        method : str, default "L-BFGS-B"
            The minimizer used. Valid options are "L-BFGS-B" , "TNC",
            "SLSQP" (default), "Powell", "trust-constr", "basinhopping" (also
            "bh") and "least_squares" (also "ls"). basinhopping tries multiple
            starting values in an attempt to find a global minimizer in
            non-convex problems, and so is slower than the others.
        minimize_kwargs : dict[str, Any]
            A dictionary of keyword arguments passed to SciPy's minimize
            function if method is one of "L-BFGS-B", "TNC",
            "SLSQP", "Powell", or "trust-constr", or SciPy's basinhopping
            or least_squares functions. The valid keywords are optimizer
            specific. Consult SciPy's documentation for the full set of
            options.
        use_brute : bool, optional
            Search for good starting values using a brute force (grid)
            optimizer. If False, a naive set of starting values is used.
        use_boxcox : {True, False, 'log', float}, optional
            Should the Box-Cox transform be applied to the data first? If 'log'
            then apply the log. If float then use the value as lambda.

            .. deprecated:: 0.12

               Set use_boxcox when constructing the model

        use_basinhopping : bool, optional
            Deprecated. Using Basin Hopping optimizer to find optimal values.
            Use ``method`` instead.

            .. deprecated:: 0.12

               Use ``method`` instead.

        initial_level : float, optional
            Value to use when initializing the fitted level.

            .. deprecated:: 0.12

               Set initial_level when constructing the model

        initial_trend : float, optional
            Value to use when initializing the fitted trend.

            .. deprecated:: 0.12

               Set initial_trend when constructing the model
               or set initialization_method.

        Returns
        -------
        HoltWintersResults
            See statsmodels.tsa.holtwinters.HoltWintersResults.

        Notes
        -----
        This is a full implementation of the holt winters exponential smoothing
        as per [1]. This includes all the unstable methods as well as the
        stable methods. The implementation of the library covers the
        functionality of the R library as much as possible whilst still
        being Pythonic.

        The parameters are ordered

        [alpha, beta, gamma, initial_level, initial_trend, phi]

        which are then followed by m seasonal values if the model contains
        a seasonal smoother. Any parameter not relevant for the model is
        omitted. For example, a model that has a level and a seasonal
        component, but no trend and is not damped, would have starting
        values

        [alpha, gamma, initial_level, s0, s1, ..., s<m-1>]

        where sj is the initial value for seasonal component j.

        References
        ----------
        [1] Hyndman, Rob J., and George Athanasopoulos. Forecasting: principles
            and practice. OTexts, 2014.
        """
        # Variable renames to alpha,beta, etc as this helps with following the
        # mathematical notation in general
        alpha = float_like(smoothing_level, "smoothing_level", True)
        beta = float_like(smoothing_trend, "smoothing_trend", True)
        gamma = float_like(smoothing_seasonal, "smoothing_seasonal", True)
        phi = float_like(damping_trend, "damping_trend", True)
        initial_level = float_like(initial_level, "initial_level", True)
        initial_trend = float_like(initial_trend, "initial_trend", True)
        start_params = array_like(start_params, "start_params", optional=True)
        minimize_kwargs = dict_like(
            minimize_kwargs, "minimize_kwargs", optional=True
        )
        minimize_kwargs = {} if minimize_kwargs is None else minimize_kwargs
        use_basinhopping = bool_like(
            use_basinhopping, "use_basinhopping", optional=True
        )
        supported_methods = ("basinhopping", "bh")
        supported_methods += ("least_squares", "ls")
        supported_methods += (
            "L-BFGS-B",
            "TNC",
            "SLSQP",
            "Powell",
            "trust-constr",
        )
        method = string_like(
            method,
            "method",
            options=supported_methods,
            lower=False,
            optional=True,
        )
        # TODO: Deprecate initial_level and related parameters from fit
        if initial_level is not None or initial_trend is not None:
            raise ValueError(
                "Initial values were set during model construction. These "
                "cannot be changed during fit."
            )
        if use_boxcox is not None:
            raise ValueError(
                "use_boxcox was set at model initialization and cannot "
                "be changed"
            )
        elif self._use_boxcox is None:
            use_boxcox = False
        else:
            use_boxcox = self._use_boxcox

        if use_basinhopping is not None:
            raise ValueError(
                "use_basinhopping is deprecated. Set optimization method "
                "using 'method'."
            )

        data = self._data
        damped = self.damped_trend
        phi = phi if damped else 1.0
        if self._use_boxcox is None:
            if use_boxcox == "log":
                lamda = 0.0
                y = boxcox(data, lamda)
            elif isinstance(use_boxcox, float):
                lamda = use_boxcox
                y = boxcox(data, lamda)
            elif use_boxcox:
                y, lamda = boxcox(data)
                # use_boxcox = lamda
            else:
                y = data.squeeze()
        else:
            y = self._y

        self._y = y
        res = _OptConfig()
        res.alpha = alpha
        res.beta = beta
        res.phi = phi
        res.gamma = gamma
        res.level = initial_level
        res.trend = initial_trend
        res.seasonal = None
        res.y = y
        res.params = start_params
        res.mle_retvals = res.mask = None
        method = "SLSQP" if method is None else method
        if optimized:
            res = self._optimize_parameters(
                res, use_brute, method, minimize_kwargs
            )
        else:
            l0, b0, s0 = self.initial_values(
                initial_level=initial_level, initial_trend=initial_trend
            )
            res.level = l0
            res.trend = b0
            res.seasonal = s0
            if self._fixed_parameters:
                fp = self._fixed_parameters
                res.alpha = fp.get("smoothing_level", res.alpha)
                res.beta = fp.get("smoothing_trend", res.beta)
                res.gamma = fp.get("smoothing_seasonal", res.gamma)
                res.phi = fp.get("damping_trend", res.phi)
                res.level = fp.get("initial_level", res.level)
                res.trend = fp.get("initial_trend", res.trend)
                res.seasonal = fp.get("initial_seasonal", res.seasonal)

        hwfit = self._predict(
            h=0,
            smoothing_level=res.alpha,
            smoothing_trend=res.beta,
            smoothing_seasonal=res.gamma,
            damping_trend=res.phi,
            initial_level=res.level,
            initial_trend=res.trend,
            initial_seasons=res.seasonal,
            use_boxcox=use_boxcox,
            remove_bias=remove_bias,
            is_optimized=res.mask,
        )
        hwfit._results.mle_retvals = res.mle_retvals
        return hwfit

    def initial_values(
        self, initial_level=None, initial_trend=None, force=False
    ):
        """
        Compute initial values used in the exponential smoothing recursions.

        Parameters
        ----------
        initial_level : {float, None}
            The initial value used for the level component.
        initial_trend : {float, None}
            The initial value used for the trend component.
        force : bool
            Force the calculation even if initial values exist.

        Returns
        -------
        initial_level : float
            The initial value used for the level component.
        initial_trend : {float, None}
            The initial value used for the trend component.
        initial_seasons : list
            The initial values used for the seasonal components.

        Notes
        -----
        Convenience function the exposes the values used to initialize the
        recursions. When optimizing parameters these are used as starting
        values.

        Method used to compute the initial value depends on when components
        are included in the model.  In a simple exponential smoothing model
        without trend or a seasonal components, the initial value is set to the
        first observation. When a trend is added, the trend is initialized
        either using y[1]/y[0], if multiplicative, or y[1]-y[0]. When the
        seasonal component is added the initialization adapts to account for
        the modified structure.
        """
        if self._initialization_method is not None and not force:
            return (
                self._initial_level,
                self._initial_trend,
                self._initial_seasonal,
            )
        y = self._y
        trend = self.trend
        seasonal = self.seasonal
        has_seasonal = self.has_seasonal
        has_trend = self.has_trend
        m = self.seasonal_periods
        l0 = initial_level
        b0 = initial_trend
        if has_seasonal:
            l0 = y[np.arange(self.nobs) % m == 0].mean() if l0 is None else l0
            if b0 is None and has_trend:
                # TODO: Fix for short m
                lead, lag = y[m : m + m], y[:m]
                if trend == "mul":
                    b0 = np.exp((np.log(lead.mean()) - np.log(lag.mean())) / m)
                else:
                    b0 = ((lead - lag) / m).mean()
            s0 = list(y[:m] / l0) if seasonal == "mul" else list(y[:m] - l0)
        elif has_trend:
            l0 = y[0] if l0 is None else l0
            if b0 is None:
                b0 = y[1] / y[0] if trend == "mul" else y[1] - y[0]
            s0 = []
        else:
            if l0 is None:
                l0 = y[0]
            b0 = None
            s0 = []

        return l0, b0, s0

    @deprecate_kwarg("smoothing_slope", "smoothing_trend")
    @deprecate_kwarg("damping_slope", "damping_trend")
    def _predict(
        self,
        h=None,
        smoothing_level=None,
        smoothing_trend=None,
        smoothing_seasonal=None,
        initial_level=None,
        initial_trend=None,
        damping_trend=None,
        initial_seasons=None,
        use_boxcox=None,
        lamda=None,
        remove_bias=None,
        is_optimized=None,
    ):
        """
        Helper prediction function

        Parameters
        ----------
        h : int, optional
            The number of time steps to forecast ahead.
        """
        # Variable renames to alpha, beta, etc as this helps with following the
        # mathematical notation in general
        alpha = smoothing_level
        beta = smoothing_trend
        gamma = smoothing_seasonal
        phi = damping_trend

        # Start in sample and out of sample predictions
        data = self.endog
        damped = self.damped_trend
        has_seasonal = self.has_seasonal
        has_trend = self.has_trend
        trend = self.trend
        seasonal = self.seasonal
        m = self.seasonal_periods
        phi = phi if damped else 1.0
        if use_boxcox == "log":
            lamda = 0.0
            y = boxcox(data, 0.0)
        elif isinstance(use_boxcox, float):
            lamda = use_boxcox
            y = boxcox(data, lamda)
        elif use_boxcox:
            y, lamda = boxcox(data)
        else:
            lamda = None
            y = data.squeeze()
            if np.ndim(y) != 1:
                raise NotImplementedError("Only 1 dimensional data supported")
        y_alpha = np.zeros((self.nobs,))
        y_gamma = np.zeros((self.nobs,))
        alphac = 1 - alpha
        y_alpha[:] = alpha * y
        betac = 1 - beta if beta is not None else 0
        gammac = 1 - gamma if gamma is not None else 0
        if has_seasonal:
            y_gamma[:] = gamma * y
        lvls = np.zeros((self.nobs + h + 1,))
        b = np.zeros((self.nobs + h + 1,))
        s = np.zeros((self.nobs + h + m + 1,))
        lvls[0] = initial_level
        b[0] = initial_trend
        s[:m] = initial_seasons
        phi_h = (
            np.cumsum(np.repeat(phi, h + 1) ** np.arange(1, h + 1 + 1))
            if damped
            else np.arange(1, h + 1 + 1)
        )
        trended = {"mul": np.multiply, "add": np.add, None: lambda l, b: l}[
            trend
        ]
        detrend = {"mul": np.divide, "add": np.subtract, None: lambda l, b: 0}[
            trend
        ]
        dampen = {"mul": np.power, "add": np.multiply, None: lambda b, phi: 0}[
            trend
        ]
        nobs = self.nobs
        if seasonal == "mul":
            for i in range(1, nobs + 1):
                lvls[i] = y_alpha[i - 1] / s[i - 1] + (
                    alphac * trended(lvls[i - 1], dampen(b[i - 1], phi))
                )
                if has_trend:
                    b[i] = (beta * detrend(lvls[i], lvls[i - 1])) + (
                        betac * dampen(b[i - 1], phi)
                    )
                s[i + m - 1] = y_gamma[i - 1] / trended(
                    lvls[i - 1], dampen(b[i - 1], phi)
                ) + (gammac * s[i - 1])
            _trend = b[1 : nobs + 1].copy()
            season = s[m : nobs + m].copy()
            lvls[nobs:] = lvls[nobs]
            if has_trend:
                b[:nobs] = dampen(b[:nobs], phi)
                b[nobs:] = dampen(b[nobs], phi_h)
            trend = trended(lvls, b)
            s[nobs + m - 1 :] = [
                s[(nobs - 1) + j % m] for j in range(h + 1 + 1)
            ]
            fitted = trend * s[:-m]
        elif seasonal == "add":
            for i in range(1, nobs + 1):
                lvls[i] = (
                    y_alpha[i - 1]
                    - (alpha * s[i - 1])
                    + (alphac * trended(lvls[i - 1], dampen(b[i - 1], phi)))
                )
                if has_trend:
                    b[i] = (beta * detrend(lvls[i], lvls[i - 1])) + (
                        betac * dampen(b[i - 1], phi)
                    )
                s[i + m - 1] = (
                    y_gamma[i - 1]
                    - (gamma * trended(lvls[i - 1], dampen(b[i - 1], phi)))
                    + (gammac * s[i - 1])
                )
            _trend = b[1 : nobs + 1].copy()
            season = s[m : nobs + m].copy()
            lvls[nobs:] = lvls[nobs]
            if has_trend:
                b[:nobs] = dampen(b[:nobs], phi)
                b[nobs:] = dampen(b[nobs], phi_h)
            trend = trended(lvls, b)
            s[nobs + m - 1 :] = [
                s[(nobs - 1) + j % m] for j in range(h + 1 + 1)
            ]
            fitted = trend + s[:-m]
        else:
            for i in range(1, nobs + 1):
                lvls[i] = y_alpha[i - 1] + (
                    alphac * trended(lvls[i - 1], dampen(b[i - 1], phi))
                )
                if has_trend:
                    b[i] = (beta * detrend(lvls[i], lvls[i - 1])) + (
                        betac * dampen(b[i - 1], phi)
                    )
            _trend = b[1 : nobs + 1].copy()
            season = s[m : nobs + m].copy()
            lvls[nobs:] = lvls[nobs]
            if has_trend:
                b[:nobs] = dampen(b[:nobs], phi)
                b[nobs:] = dampen(b[nobs], phi_h)
            trend = trended(lvls, b)
            fitted = trend
        level = lvls[1 : nobs + 1].copy()
        if use_boxcox or use_boxcox == "log" or isinstance(use_boxcox, float):
            fitted = inv_boxcox(fitted, lamda)
        err = fitted[: -h - 1] - data
        sse = err.T @ err
        # (s0 + gamma) + (b0 + beta) + (l0 + alpha) + phi
        k = m * has_seasonal + 2 * has_trend + 2 + 1 * damped
        aic = self.nobs * np.log(sse / self.nobs) + k * 2
        dof_eff = self.nobs - k - 3
        if dof_eff > 0:
            aicc_penalty = (2 * (k + 2) * (k + 3)) / dof_eff
        else:
            aicc_penalty = np.inf
        aicc = aic + aicc_penalty
        bic = self.nobs * np.log(sse / self.nobs) + k * np.log(self.nobs)
        resid = data - fitted[: -h - 1]
        if remove_bias:
            fitted += resid.mean()
        self.params = {
            "smoothing_level": alpha,
            "smoothing_trend": beta,
            "smoothing_seasonal": gamma,
            "damping_trend": phi if damped else np.nan,
            "initial_level": lvls[0],
            "initial_trend": b[0] / phi if phi > 0 else 0,
            "initial_seasons": s[:m],
            "use_boxcox": use_boxcox,
            "lamda": lamda,
            "remove_bias": remove_bias,
        }

        # Format parameters into a DataFrame
        codes = ["alpha", "beta", "gamma", "l.0", "b.0", "phi"]
        codes += [f"s.{i}" for i in range(m)]
        idx = [
            "smoothing_level",
            "smoothing_trend",
            "smoothing_seasonal",
            "initial_level",
            "initial_trend",
            "damping_trend",
        ]
        idx += [f"initial_seasons.{i}" for i in range(m)]

        formatted = [alpha, beta, gamma, lvls[0], b[0], phi]
        formatted += s[:m].tolist()
        formatted = list(map(lambda v: np.nan if v is None else v, formatted))
        formatted = np.array(formatted)
        if is_optimized is None:
            optimized = np.zeros(len(codes), dtype=bool)
        else:
            optimized = is_optimized.astype(bool)
        included = [True, has_trend, has_seasonal, True, has_trend, damped]
        included += [True] * m
        formatted = pd.DataFrame(
            [[c, f, o] for c, f, o in zip(codes, formatted, optimized)],
            columns=["name", "param", "optimized"],
            index=idx,
        )
        formatted = formatted.loc[included]

        hwfit = HoltWintersResults(
            self,
            self.params,
            fittedfcast=fitted,
            fittedvalues=fitted[: -h - 1],
            fcastvalues=fitted[-h - 1 :],
            sse=sse,
            level=level,
            trend=_trend,
            season=season,
            aic=aic,
            bic=bic,
            aicc=aicc,
            resid=resid,
            k=k,
            params_formatted=formatted,
            optimized=optimized,
        )
        return HoltWintersResultsWrapper(hwfit)


class SimpleExpSmoothing(ExponentialSmoothing):
    """
    Simple Exponential Smoothing

    Parameters
    ----------
    endog : array_like
        The time series to model.
    initialization_method : str, optional
        Method for initialize the recursions. One of:

        * None
        * 'estimated'
        * 'heuristic'
        * 'legacy-heuristic'
        * 'known'

        None defaults to the pre-0.12 behavior where initial values
        are passed as part of ``fit``. If any of the other values are
        passed, then the initial values must also be set when constructing
        the model. If 'known' initialization is used, then `initial_level`
        must be passed, as well as `initial_trend` and `initial_seasonal` if
        applicable. Default is 'estimated'. "legacy-heuristic" uses the same
        values that were used in statsmodels 0.11 and earlier.
    initial_level : float, optional
        The initial level component. Required if estimation method is "known".
        If set using either "estimated" or "heuristic" this value is used.
        This allows one or more of the initial values to be set while
        deferring to the heuristic for others or estimating the unset
        parameters.

    See Also
    --------
    ExponentialSmoothing
        Exponential smoothing with trend and seasonal components.
    Holt
        Exponential smoothing with a trend component.

    Notes
    -----
    This is a full implementation of the simple exponential smoothing as
    per [1]_.  `SimpleExpSmoothing` is a restricted version of
    :class:`ExponentialSmoothing`.

    See the notebook `Exponential Smoothing
    <../examples/notebooks/generated/exponential_smoothing.html>`__
    for an overview.

    References
    ----------
    .. [1] Hyndman, Rob J., and George Athanasopoulos. Forecasting: principles
        and practice. OTexts, 2014.
    """

    def __init__(
        self,
        endog,
        initialization_method=None,  # Future: 'estimated',
        initial_level=None,
    ):
        super().__init__(
            endog,
            initialization_method=initialization_method,
            initial_level=initial_level,
        )

    def fit(
        self,
        smoothing_level=None,
        *,
        optimized=True,
        start_params=None,
        initial_level=None,
        use_brute=True,
        use_boxcox=None,
        remove_bias=False,
        method=None,
        minimize_kwargs=None,
    ):
        """
        Fit the model

        Parameters
        ----------
        smoothing_level : float, optional
            The smoothing_level value of the simple exponential smoothing, if
            the value is set then this value will be used as the value.
        optimized : bool, optional
            Estimate model parameters by maximizing the log-likelihood.
        start_params : ndarray, optional
            Starting values to used when optimizing the fit.  If not provided,
            starting values are determined using a combination of grid search
            and reasonable values based on the initial values of the data.
        initial_level : float, optional
            Value to use when initializing the fitted level.
        use_brute : bool, optional
            Search for good starting values using a brute force (grid)
            optimizer. If False, a naive set of starting values is used.
        use_boxcox : {True, False, 'log', float}, optional
            Should the Box-Cox transform be applied to the data first? If 'log'
            then apply the log. If float then use the value as lambda.
        remove_bias : bool, optional
            Remove bias from forecast values and fitted values by enforcing
            that the average residual is equal to zero.
        method : str, default "L-BFGS-B"
            The minimizer used. Valid options are "L-BFGS-B" (default), "TNC",
            "SLSQP", "Powell", "trust-constr", "basinhopping" (also "bh") and
            "least_squares" (also "ls"). basinhopping tries multiple starting
            values in an attempt to find a global minimizer in non-convex
            problems, and so is slower than the others.
        minimize_kwargs : dict[str, Any]
            A dictionary of keyword arguments passed to SciPy's minimize
            function if method is one of "L-BFGS-B" (default), "TNC",
            "SLSQP", "Powell", or "trust-constr", or SciPy's basinhopping
            or least_squares. The valid keywords are optimizer specific.
            Consult SciPy's documentation for the full set of options.

        Returns
        -------
        HoltWintersResults
            See statsmodels.tsa.holtwinters.HoltWintersResults.

        Notes
        -----
        This is a full implementation of the simple exponential smoothing as
        per [1].

        References
        ----------
        [1] Hyndman, Rob J., and George Athanasopoulos. Forecasting: principles
            and practice. OTexts, 2014.
        """
        return super().fit(
            smoothing_level=smoothing_level,
            optimized=optimized,
            start_params=start_params,
            initial_level=initial_level,
            use_brute=use_brute,
            remove_bias=remove_bias,
            use_boxcox=use_boxcox,
            method=method,
            minimize_kwargs=minimize_kwargs,
        )


class Holt(ExponentialSmoothing):
    """
    Holt's Exponential Smoothing

    Parameters
    ----------
    endog : array_like
        The time series to model.
    exponential : bool, optional
        Type of trend component.
    damped_trend : bool, optional
        Should the trend component be damped.
    initialization_method : str, optional
        Method for initialize the recursions. One of:

        * None
        * 'estimated'
        * 'heuristic'
        * 'legacy-heuristic'
        * 'known'

        None defaults to the pre-0.12 behavior where initial values
        are passed as part of ``fit``. If any of the other values are
        passed, then the initial values must also be set when constructing
        the model. If 'known' initialization is used, then `initial_level`
        must be passed, as well as `initial_trend` and `initial_seasonal` if
        applicable. Default is 'estimated'. "legacy-heuristic" uses the same
        values that were used in statsmodels 0.11 and earlier.
    initial_level : float, optional
        The initial level component. Required if estimation method is "known".
        If set using either "estimated" or "heuristic" this value is used.
        This allows one or more of the initial values to be set while
        deferring to the heuristic for others or estimating the unset
        parameters.
    initial_trend : float, optional
        The initial trend component. Required if estimation method is "known".
        If set using either "estimated" or "heuristic" this value is used.
        This allows one or more of the initial values to be set while
        deferring to the heuristic for others or estimating the unset
        parameters.

    See Also
    --------
    ExponentialSmoothing
        Exponential smoothing with trend and seasonal components.
    SimpleExpSmoothing
        Basic exponential smoothing with only a level component.

    Notes
    -----
    This is a full implementation of the Holt's exponential smoothing as
    per [1]_. `Holt` is a restricted version of :class:`ExponentialSmoothing`.

    See the notebook `Exponential Smoothing
    <../examples/notebooks/generated/exponential_smoothing.html>`__
    for an overview.

    References
    ----------
    .. [1] Hyndman, Rob J., and George Athanasopoulos. Forecasting: principles
        and practice. OTexts, 2014.
    """

    @deprecate_kwarg("damped", "damped_trend")
    def __init__(
        self,
        endog,
        exponential=False,
        damped_trend=False,
        initialization_method=None,  # Future: 'estimated',
        initial_level=None,
        initial_trend=None,
    ):
        trend = "mul" if exponential else "add"
        super().__init__(
            endog,
            trend=trend,
            damped_trend=damped_trend,
            initialization_method=initialization_method,
            initial_level=initial_level,
            initial_trend=initial_trend,
        )

    @deprecate_kwarg("smoothing_slope", "smoothing_trend")
    @deprecate_kwarg("initial_slope", "initial_trend")
    @deprecate_kwarg("damping_slope", "damping_trend")
    def fit(
        self,
        smoothing_level=None,
        smoothing_trend=None,
        *,
        damping_trend=None,
        optimized=True,
        start_params=None,
        initial_level=None,
        initial_trend=None,
        use_brute=True,
        use_boxcox=None,
        remove_bias=False,
        method=None,
        minimize_kwargs=None,
    ):
        """
        Fit the model

        Parameters
        ----------
        smoothing_level : float, optional
            The alpha value of the simple exponential smoothing, if the value
            is set then this value will be used as the value.
        smoothing_trend :  float, optional
            The beta value of the Holt's trend method, if the value is
            set then this value will be used as the value.
        damping_trend : float, optional
            The phi value of the damped method, if the value is
            set then this value will be used as the value.
        optimized : bool, optional
            Estimate model parameters by maximizing the log-likelihood.
        start_params : ndarray, optional
            Starting values to used when optimizing the fit.  If not provided,
            starting values are determined using a combination of grid search
            and reasonable values based on the initial values of the data.
        initial_level : float, optional
            Value to use when initializing the fitted level.

            .. deprecated:: 0.12

               Set initial_level when constructing the model

        initial_trend : float, optional
            Value to use when initializing the fitted trend.

            .. deprecated:: 0.12

               Set initial_trend when constructing the model

        use_brute : bool, optional
            Search for good starting values using a brute force (grid)
            optimizer. If False, a naive set of starting values is used.
        use_boxcox : {True, False, 'log', float}, optional
            Should the Box-Cox transform be applied to the data first? If 'log'
            then apply the log. If float then use the value as lambda.
        remove_bias : bool, optional
            Remove bias from forecast values and fitted values by enforcing
            that the average residual is equal to zero.
        method : str, default "L-BFGS-B"
            The minimizer used. Valid options are "L-BFGS-B" (default), "TNC",
            "SLSQP", "Powell", "trust-constr", "basinhopping" (also "bh") and
            "least_squares" (also "ls"). basinhopping tries multiple starting
            values in an attempt to find a global minimizer in non-convex
            problems, and so is slower than the others.
        minimize_kwargs : dict[str, Any]
            A dictionary of keyword arguments passed to SciPy's minimize
            function if method is one of "L-BFGS-B" (default), "TNC",
            "SLSQP", "Powell", or "trust-constr", or SciPy's basinhopping
            or least_squares. The valid keywords are optimizer specific.
            Consult SciPy's documentation for the full set of options.

        Returns
        -------
        HoltWintersResults
            See statsmodels.tsa.holtwinters.HoltWintersResults.

        Notes
        -----
        This is a full implementation of the Holt's exponential smoothing as
        per [1].

        References
        ----------
        [1] Hyndman, Rob J., and George Athanasopoulos. Forecasting: principles
            and practice. OTexts, 2014.
        """
        return super().fit(
            smoothing_level=smoothing_level,
            smoothing_trend=smoothing_trend,
            damping_trend=damping_trend,
            optimized=optimized,
            start_params=start_params,
            initial_level=initial_level,
            initial_trend=initial_trend,
            use_brute=use_brute,
            use_boxcox=use_boxcox,
            remove_bias=remove_bias,
            method=method,
            minimize_kwargs=minimize_kwargs,
        )
