"""
SARIMAX Model

Author: Chad Fulton
License: Simplified-BSD
"""
from warnings import warn

import numpy as np
import pandas as pd

from statsmodels.compat.pandas import Appender

from statsmodels.tools.tools import Bunch
from statsmodels.tools.data import _is_using_pandas
from statsmodels.tools.decorators import cache_readonly
import statsmodels.base.wrapper as wrap

from statsmodels.tsa.arima.specification import SARIMAXSpecification
from statsmodels.tsa.arima.params import SARIMAXParams
from statsmodels.tsa.tsatools import lagmat

from .initialization import Initialization
from .mlemodel import MLEModel, MLEResults, MLEResultsWrapper
from .tools import (
    companion_matrix, diff, is_invertible, constrain_stationary_univariate,
    unconstrain_stationary_univariate,
    prepare_exog, prepare_trend_spec, prepare_trend_data)


class SARIMAX(MLEModel):
    r"""
    Seasonal AutoRegressive Integrated Moving Average with eXogenous regressors
    model

    Parameters
    ----------
    endog : array_like
        The observed time-series process :math:`y`
    exog : array_like, optional
        Array of exogenous regressors, shaped nobs x k.
    order : iterable or iterable of iterables, optional
        The (p,d,q) order of the model for the number of AR parameters,
        differences, and MA parameters. `d` must be an integer
        indicating the integration order of the process, while
        `p` and `q` may either be an integers indicating the AR and MA
        orders (so that all lags up to those orders are included) or else
        iterables giving specific AR and / or MA lags to include. Default is
        an AR(1) model: (1,0,0).
    seasonal_order : iterable, optional
        The (P,D,Q,s) order of the seasonal component of the model for the
        AR parameters, differences, MA parameters, and periodicity.
        `D` must be an integer indicating the integration order of the process,
        while `P` and `Q` may either be an integers indicating the AR and MA
        orders (so that all lags up to those orders are included) or else
        iterables giving specific AR and / or MA lags to include. `s` is an
        integer giving the periodicity (number of periods in season), often it
        is 4 for quarterly data or 12 for monthly data. Default is no seasonal
        effect.
    trend : str{'n','c','t','ct'} or iterable, optional
        Parameter controlling the deterministic trend polynomial :math:`A(t)`.
        Can be specified as a string where 'c' indicates a constant (i.e. a
        degree zero component of the trend polynomial), 't' indicates a
        linear trend with time, and 'ct' is both. Can also be specified as an
        iterable defining the non-zero polynomial exponents to include, in
        increasing order. For example, `[1,1,0,1]` denotes
        :math:`a + bt + ct^3`. Default is to not include a trend component.
    measurement_error : bool, optional
        Whether or not to assume the endogenous observations `endog` were
        measured with error. Default is False.
    time_varying_regression : bool, optional
        Used when an explanatory variables, `exog`, are provided
        to select whether or not coefficients on the exogenous regressors are
        allowed to vary over time. Default is False.
    mle_regression : bool, optional
        Whether or not to use estimate the regression coefficients for the
        exogenous variables as part of maximum likelihood estimation or through
        the Kalman filter (i.e. recursive least squares). If
        `time_varying_regression` is True, this must be set to False. Default
        is True.
    simple_differencing : bool, optional
        Whether or not to use partially conditional maximum likelihood
        estimation. If True, differencing is performed prior to estimation,
        which discards the first :math:`s D + d` initial rows but results in a
        smaller state-space formulation. See the Notes section for important
        details about interpreting results when this option is used. If False,
        the full SARIMAX model is put in state-space form so that all
        datapoints can be used in estimation. Default is False.
    enforce_stationarity : bool, optional
        Whether or not to transform the AR parameters to enforce stationarity
        in the autoregressive component of the model. Default is True.
    enforce_invertibility : bool, optional
        Whether or not to transform the MA parameters to enforce invertibility
        in the moving average component of the model. Default is True.
    hamilton_representation : bool, optional
        Whether or not to use the Hamilton representation of an ARMA process
        (if True) or the Harvey representation (if False). Default is False.
    concentrate_scale : bool, optional
        Whether or not to concentrate the scale (variance of the error term)
        out of the likelihood. This reduces the number of parameters estimated
        by maximum likelihood by one, but standard errors will then not
        be available for the scale parameter.
    trend_offset : int, optional
        The offset at which to start time trend values. Default is 1, so that
        if `trend='t'` the trend is equal to 1, 2, ..., nobs. Typically is only
        set when the model created by extending a previous dataset.
    use_exact_diffuse : bool, optional
        Whether or not to use exact diffuse initialization for non-stationary
        states. Default is False (in which case approximate diffuse
        initialization is used).
    **kwargs
        Keyword arguments may be used to provide default values for state space
        matrices or for Kalman filtering options. See `Representation`, and
        `KalmanFilter` for more details.

    Attributes
    ----------
    measurement_error : bool
        Whether or not to assume the endogenous
        observations `endog` were measured with error.
    state_error : bool
        Whether or not the transition equation has an error component.
    mle_regression : bool
        Whether or not the regression coefficients for
        the exogenous variables were estimated via maximum
        likelihood estimation.
    state_regression : bool
        Whether or not the regression coefficients for
        the exogenous variables are included as elements
        of the state space and estimated via the Kalman
        filter.
    time_varying_regression : bool
        Whether or not coefficients on the exogenous
        regressors are allowed to vary over time.
    simple_differencing : bool
        Whether or not to use partially conditional maximum likelihood
        estimation.
    enforce_stationarity : bool
        Whether or not to transform the AR parameters
        to enforce stationarity in the autoregressive
        component of the model.
    enforce_invertibility : bool
        Whether or not to transform the MA parameters
        to enforce invertibility in the moving average
        component of the model.
    hamilton_representation : bool
        Whether or not to use the Hamilton representation of an ARMA process.
    trend : str{'n','c','t','ct'} or iterable
        Parameter controlling the deterministic
        trend polynomial :math:`A(t)`. See the class
        parameter documentation for more information.
    polynomial_ar : ndarray
        Array containing autoregressive lag polynomial lags, ordered from
        lowest degree to highest. The polynomial begins with lag 0.
        Initialized with ones, unless a coefficient is constrained to be
        zero (in which case it is zero).
    polynomial_ma : ndarray
        Array containing moving average lag polynomial lags, ordered from
        lowest degree to highest. Initialized with ones, unless a coefficient
        is constrained to be zero (in which case it is zero).
    polynomial_seasonal_ar : ndarray
        Array containing seasonal moving average lag
        polynomial lags, ordered from lowest degree
        to highest. Initialized with ones, unless a
        coefficient is constrained to be zero (in which
        case it is zero).
    polynomial_seasonal_ma : ndarray
        Array containing seasonal moving average lag
        polynomial lags, ordered from lowest degree
        to highest. Initialized with ones, unless a
        coefficient is constrained to be zero (in which
        case it is zero).
    polynomial_trend : ndarray
        Array containing trend polynomial coefficients,
        ordered from lowest degree to highest. Initialized
        with ones, unless a coefficient is constrained to be
        zero (in which case it is zero).
    k_ar : int
        Highest autoregressive order in the model, zero-indexed.
    k_ar_params : int
        Number of autoregressive parameters to be estimated.
    k_diff : int
        Order of integration.
    k_ma : int
        Highest moving average order in the model, zero-indexed.
    k_ma_params : int
        Number of moving average parameters to be estimated.
    seasonal_periods : int
        Number of periods in a season.
    k_seasonal_ar : int
        Highest seasonal autoregressive order in the model, zero-indexed.
    k_seasonal_ar_params : int
        Number of seasonal autoregressive parameters to be estimated.
    k_seasonal_diff : int
        Order of seasonal integration.
    k_seasonal_ma : int
        Highest seasonal moving average order in the model, zero-indexed.
    k_seasonal_ma_params : int
        Number of seasonal moving average parameters to be estimated.
    k_trend : int
        Order of the trend polynomial plus one (i.e. the constant polynomial
        would have `k_trend=1`).
    k_exog : int
        Number of exogenous regressors.

    Notes
    -----
    The SARIMA model is specified :math:`(p, d, q) \times (P, D, Q)_s`.

    .. math::

        \phi_p (L) \tilde \phi_P (L^s) \Delta^d \Delta_s^D y_t = A(t) +
            \theta_q (L) \tilde \theta_Q (L^s) \zeta_t

    In terms of a univariate structural model, this can be represented as

    .. math::

        y_t & = u_t + \eta_t \\
        \phi_p (L) \tilde \phi_P (L^s) \Delta^d \Delta_s^D u_t & = A(t) +
            \theta_q (L) \tilde \theta_Q (L^s) \zeta_t

    where :math:`\eta_t` is only applicable in the case of measurement error
    (although it is also used in the case of a pure regression model, i.e. if
    p=q=0).

    In terms of this model, regression with SARIMA errors can be represented
    easily as

    .. math::

        y_t & = \beta_t x_t + u_t \\
        \phi_p (L) \tilde \phi_P (L^s) \Delta^d \Delta_s^D u_t & = A(t) +
            \theta_q (L) \tilde \theta_Q (L^s) \zeta_t

    this model is the one used when exogenous regressors are provided.

    Note that the reduced form lag polynomials will be written as:

    .. math::

        \Phi (L) \equiv \phi_p (L) \tilde \phi_P (L^s) \\
        \Theta (L) \equiv \theta_q (L) \tilde \theta_Q (L^s)

    If `mle_regression` is True, regression coefficients are treated as
    additional parameters to be estimated via maximum likelihood. Otherwise
    they are included as part of the state with a diffuse initialization.
    In this case, however, with approximate diffuse initialization, results
    can be sensitive to the initial variance.

    This class allows two different underlying representations of ARMA models
    as state space models: that of Hamilton and that of Harvey. Both are
    equivalent in the sense that they are analytical representations of the
    ARMA model, but the state vectors of each have different meanings. For
    this reason, maximum likelihood does not result in identical parameter
    estimates and even the same set of parameters will result in different
    loglikelihoods.

    The Harvey representation is convenient because it allows integrating
    differencing into the state vector to allow using all observations for
    estimation.

    In this implementation of differenced models, the Hamilton representation
    is not able to accommodate differencing in the state vector, so
    `simple_differencing` (which performs differencing prior to estimation so
    that the first d + sD observations are lost) must be used.

    Many other packages use the Hamilton representation, so that tests against
    Stata and R require using it along with simple differencing (as Stata
    does).

    If `filter_concentrated = True` is used, then the scale of the model is
    concentrated out of the likelihood. A benefit of this is that there the
    dimension of the parameter vector is reduced so that numerical maximization
    of the log-likelihood function may be faster and more stable. If this
    option in a model with measurement error, it is important to note that the
    estimated measurement error parameter will be relative to the scale, and
    is named "snr.measurement_error" instead of "var.measurement_error". To
    compute the variance of the measurement error in this case one would
    multiply `snr.measurement_error` parameter by the scale.

    If `simple_differencing = True` is used, then the `endog` and `exog` data
    are differenced prior to putting the model in state-space form. This has
    the same effect as if the user differenced the data prior to constructing
    the model, which has implications for using the results:

    - Forecasts and predictions will be about the *differenced* data, not about
      the original data. (while if `simple_differencing = False` is used, then
      forecasts and predictions will be about the original data).
    - If the original data has an Int64Index, a new RangeIndex will be created
      for the differenced data that starts from one, and forecasts and
      predictions will use this new index.

    Detailed information about state space models can be found in [1]_. Some
    specific references are:

    - Chapter 3.4 describes ARMA and ARIMA models in state space form (using
      the Harvey representation), and gives references for basic seasonal
      models and models with a multiplicative form (for example the airline
      model). It also shows a state space model for a full ARIMA process (this
      is what is done here if `simple_differencing=False`).
    - Chapter 3.6 describes estimating regression effects via the Kalman filter
      (this is performed if `mle_regression` is False), regression with
      time-varying coefficients, and regression with ARMA errors (recall from
      above that if regression effects are present, the model estimated by this
      class is regression with SARIMA errors).
    - Chapter 8.4 describes the application of an ARMA model to an example
      dataset. A replication of this section is available in an example
      IPython notebook in the documentation.

    References
    ----------
    .. [1] Durbin, James, and Siem Jan Koopman. 2012.
       Time Series Analysis by State Space Methods: Second Edition.
       Oxford University Press.
    """

    def __init__(self, endog, exog=None, order=(1, 0, 0),
                 seasonal_order=(0, 0, 0, 0), trend=None,
                 measurement_error=False, time_varying_regression=False,
                 mle_regression=True, simple_differencing=False,
                 enforce_stationarity=True, enforce_invertibility=True,
                 hamilton_representation=False, concentrate_scale=False,
                 trend_offset=1, use_exact_diffuse=False, dates=None,
                 freq=None, missing='none', validate_specification=True,
                 **kwargs):

        self._spec = SARIMAXSpecification(
            endog, exog=exog, order=order, seasonal_order=seasonal_order,
            trend=trend, enforce_stationarity=None, enforce_invertibility=None,
            concentrate_scale=concentrate_scale, dates=dates, freq=freq,
            missing=missing, validate_specification=validate_specification)
        self._params = SARIMAXParams(self._spec)

        # Save given orders
        order = self._spec.order
        seasonal_order = self._spec.seasonal_order
        self.order = order
        self.seasonal_order = seasonal_order

        # Model parameters
        self.seasonal_periods = seasonal_order[3]
        self.measurement_error = measurement_error
        self.time_varying_regression = time_varying_regression
        self.mle_regression = mle_regression
        self.simple_differencing = simple_differencing
        self.enforce_stationarity = enforce_stationarity
        self.enforce_invertibility = enforce_invertibility
        self.hamilton_representation = hamilton_representation
        self.concentrate_scale = concentrate_scale
        self.use_exact_diffuse = use_exact_diffuse

        # Enforce non-MLE coefficients if time varying coefficients is
        # specified
        if self.time_varying_regression and self.mle_regression:
            raise ValueError('Models with time-varying regression coefficients'
                             ' must integrate the coefficients as part of the'
                             ' state vector, so that `mle_regression` must'
                             ' be set to False.')

        # Lag polynomials
        self._params.ar_params = -1
        self.polynomial_ar = self._params.ar_poly.coef
        self._polynomial_ar = self.polynomial_ar.copy()

        self._params.ma_params = 1
        self.polynomial_ma = self._params.ma_poly.coef
        self._polynomial_ma = self.polynomial_ma.copy()

        self._params.seasonal_ar_params = -1
        self.polynomial_seasonal_ar = self._params.seasonal_ar_poly.coef
        self._polynomial_seasonal_ar = self.polynomial_seasonal_ar.copy()

        self._params.seasonal_ma_params = 1
        self.polynomial_seasonal_ma = self._params.seasonal_ma_poly.coef
        self._polynomial_seasonal_ma = self.polynomial_seasonal_ma.copy()

        # Deterministic trend polynomial
        self.trend = trend
        self.trend_offset = trend_offset
        self.polynomial_trend, self.k_trend = prepare_trend_spec(self.trend)
        self._polynomial_trend = self.polynomial_trend.copy()
        self._k_trend = self.k_trend
        # (we internally use _k_trend for mechanics so that the public
        # attribute can be overridden by subclasses)

        # Model orders
        # Note: k_ar, k_ma, k_seasonal_ar, k_seasonal_ma do not include the
        # constant term, so they may be zero.
        # Note: for a typical ARMA(p,q) model, p = k_ar_params = k_ar - 1 and
        # q = k_ma_params = k_ma - 1, although this may not be true for models
        # with arbitrary log polynomials.
        self.k_ar = self._spec.max_ar_order
        self.k_ar_params = self._spec.k_ar_params
        self.k_diff = int(order[1])
        self.k_ma = self._spec.max_ma_order
        self.k_ma_params = self._spec.k_ma_params

        self.k_seasonal_ar = (self._spec.max_seasonal_ar_order *
                              self._spec.seasonal_periods)
        self.k_seasonal_ar_params = self._spec.k_seasonal_ar_params
        self.k_seasonal_diff = int(seasonal_order[1])
        self.k_seasonal_ma = (self._spec.max_seasonal_ma_order *
                              self._spec.seasonal_periods)
        self.k_seasonal_ma_params = self._spec.k_seasonal_ma_params

        # Make internal copies of the differencing orders because if we use
        # simple differencing, then we will need to internally use zeros after
        # the simple differencing has been performed
        self._k_diff = self.k_diff
        self._k_seasonal_diff = self.k_seasonal_diff

        # We can only use the Hamilton representation if differencing is not
        # performed as a part of the state space
        if (self.hamilton_representation and not (self.simple_differencing or
           self._k_diff == self._k_seasonal_diff == 0)):
            raise ValueError('The Hamilton representation is only available'
                             ' for models in which there is no differencing'
                             ' integrated into the state vector. Set'
                             ' `simple_differencing` to True or set'
                             ' `hamilton_representation` to False')

        # Model order
        # (this is used internally in a number of locations)
        self._k_order = max(self.k_ar + self.k_seasonal_ar,
                            self.k_ma + self.k_seasonal_ma + 1)
        if self._k_order == 1 and self.k_ar + self.k_seasonal_ar == 0:
            # Handle time-varying regression
            if self.time_varying_regression:
                self._k_order = 0

        # Exogenous data
        (self._k_exog, exog) = prepare_exog(exog)
        # (we internally use _k_exog for mechanics so that the public attribute
        # can be overridden by subclasses)
        self.k_exog = self._k_exog

        # Redefine mle_regression to be true only if it was previously set to
        # true and there are exogenous regressors
        self.mle_regression = (
            self.mle_regression and exog is not None and self._k_exog > 0
        )
        # State regression is regression with coefficients estimated within
        # the state vector
        self.state_regression = (
            not self.mle_regression and exog is not None and self._k_exog > 0
        )
        # If all we have is a regression (so k_ar = k_ma = 0), then put the
        # error term as measurement error
        if self.state_regression and self._k_order == 0:
            self.measurement_error = True

        # Number of states
        k_states = self._k_order
        if not self.simple_differencing:
            k_states += (self.seasonal_periods * self._k_seasonal_diff +
                         self._k_diff)
        if self.state_regression:
            k_states += self._k_exog

        # Number of positive definite elements of the state covariance matrix
        k_posdef = int(self._k_order > 0)
        # Only have an error component to the states if k_posdef > 0
        self.state_error = k_posdef > 0
        if self.state_regression and self.time_varying_regression:
            k_posdef += self._k_exog

        # Diffuse initialization can be more sensistive to the variance value
        # in the case of state regression, so set a higher than usual default
        # variance
        if self.state_regression:
            kwargs.setdefault('initial_variance', 1e10)

        # Handle non-default loglikelihood burn
        self._loglikelihood_burn = kwargs.get('loglikelihood_burn', None)

        # Number of parameters
        self.k_params = (
            self.k_ar_params + self.k_ma_params +
            self.k_seasonal_ar_params + self.k_seasonal_ma_params +
            self._k_trend +
            self.measurement_error +
            int(not self.concentrate_scale)
        )
        if self.mle_regression:
            self.k_params += self._k_exog

        # We need to have an array or pandas at this point
        self.orig_endog = endog
        self.orig_exog = exog
        if not _is_using_pandas(endog, None):
            endog = np.asanyarray(endog)

        # Update the differencing dimensions if simple differencing is applied
        self.orig_k_diff = self._k_diff
        self.orig_k_seasonal_diff = self._k_seasonal_diff
        if (self.simple_differencing and
           (self._k_diff > 0 or self._k_seasonal_diff > 0)):
            self._k_diff = 0
            self._k_seasonal_diff = 0

        # Internally used in several locations
        self._k_states_diff = (
            self._k_diff + self.seasonal_periods * self._k_seasonal_diff
        )

        # Set some model variables now so they will be available for the
        # initialize() method, below
        self.nobs = len(endog)
        self.k_states = k_states
        self.k_posdef = k_posdef

        # Initialize the statespace
        super().__init__(
            endog, exog=exog, k_states=k_states, k_posdef=k_posdef, **kwargs
        )

        # Set the filter to concentrate out the scale if requested
        if self.concentrate_scale:
            self.ssm.filter_concentrated = True

        # Set as time-varying model if we have time-trend or exog
        if self._k_exog > 0 or len(self.polynomial_trend) > 1:
            self.ssm._time_invariant = False

        # Initialize the fixed components of the statespace model
        self.ssm['design'] = self.initial_design
        self.ssm['state_intercept'] = self.initial_state_intercept
        self.ssm['transition'] = self.initial_transition
        self.ssm['selection'] = self.initial_selection
        if self.concentrate_scale:
            self.ssm['state_cov', 0, 0] = 1.

        # update _init_keys attached by super
        self._init_keys += ['order', 'seasonal_order', 'trend',
                            'measurement_error', 'time_varying_regression',
                            'mle_regression', 'simple_differencing',
                            'enforce_stationarity', 'enforce_invertibility',
                            'hamilton_representation', 'concentrate_scale',
                            'trend_offset'] + list(kwargs.keys())
        # TODO: I think the kwargs or not attached, need to recover from ???

        # Initialize the state
        if self.ssm.initialization is None:
            self.initialize_default()

    def prepare_data(self):
        endog, exog = super().prepare_data()

        # Perform simple differencing if requested
        if (self.simple_differencing and
           (self.orig_k_diff > 0 or self.orig_k_seasonal_diff > 0)):
            # Save the original length
            orig_length = endog.shape[0]
            # Perform simple differencing
            endog = diff(endog.copy(), self.orig_k_diff,
                         self.orig_k_seasonal_diff, self.seasonal_periods)
            if exog is not None:
                exog = diff(exog.copy(), self.orig_k_diff,
                            self.orig_k_seasonal_diff, self.seasonal_periods)

            # Reset the ModelData datasets and cache
            self.data.endog, self.data.exog = (
                self.data._convert_endog_exog(endog, exog))

            # Reset indexes, if provided
            new_length = self.data.endog.shape[0]
            if self.data.row_labels is not None:
                self.data._cache['row_labels'] = (
                    self.data.row_labels[orig_length - new_length:])
            if self._index is not None:
                if self._index_int64:
                    self._index = pd.RangeIndex(start=1, stop=new_length + 1)
                elif self._index_generated:
                    self._index = self._index[:-(orig_length - new_length)]
                else:
                    self._index = self._index[orig_length - new_length:]

        # Reset the nobs
        self.nobs = endog.shape[0]

        # Cache the arrays for calculating the intercept from the trend
        # components
        self._trend_data = prepare_trend_data(
            self.polynomial_trend, self._k_trend, self.nobs, self.trend_offset)

        return endog, exog

    def initialize(self):
        """
        Initialize the SARIMAX model.

        Notes
        -----
        These initialization steps must occur following the parent class
        __init__ function calls.
        """
        super().initialize()

        # Cache the indexes of included polynomial orders (for update below)
        # (but we do not want the index of the constant term, so exclude the
        # first index)
        self._polynomial_ar_idx = np.nonzero(self.polynomial_ar)[0][1:]
        self._polynomial_ma_idx = np.nonzero(self.polynomial_ma)[0][1:]
        self._polynomial_seasonal_ar_idx = np.nonzero(
            self.polynomial_seasonal_ar
        )[0][1:]
        self._polynomial_seasonal_ma_idx = np.nonzero(
            self.polynomial_seasonal_ma
        )[0][1:]

        # Save the indices corresponding to the reduced form lag polynomial
        # parameters in the transition and selection matrices so that they
        # do not have to be recalculated for each update()
        start_row = self._k_states_diff
        end_row = start_row + self.k_ar + self.k_seasonal_ar
        col = self._k_states_diff
        if not self.hamilton_representation:
            self.transition_ar_params_idx = (
                np.s_['transition', start_row:end_row, col]
            )
        else:
            self.transition_ar_params_idx = (
                np.s_['transition', col, start_row:end_row]
            )

        start_row += 1
        end_row = start_row + self.k_ma + self.k_seasonal_ma
        col = 0
        if not self.hamilton_representation:
            self.selection_ma_params_idx = (
                np.s_['selection', start_row:end_row, col]
            )
        else:
            self.design_ma_params_idx = (
                np.s_['design', col, start_row:end_row]
            )

        # Cache indices for exog variances in the state covariance matrix
        if self.state_regression and self.time_varying_regression:
            idx = np.diag_indices(self.k_posdef)
            self._exog_variance_idx = ('state_cov', idx[0][-self._k_exog:],
                                       idx[1][-self._k_exog:])

    def initialize_default(self, approximate_diffuse_variance=None):
        """Initialize default"""
        if approximate_diffuse_variance is None:
            approximate_diffuse_variance = self.ssm.initial_variance
        if self.use_exact_diffuse:
            diffuse_type = 'diffuse'
        else:
            diffuse_type = 'approximate_diffuse'

            # Set the loglikelihood burn parameter, if not given in constructor
            if self._loglikelihood_burn is None:
                k_diffuse_states = self.k_states
                if self.enforce_stationarity:
                    k_diffuse_states -= self._k_order
                self.loglikelihood_burn = k_diffuse_states

        init = Initialization(
            self.k_states,
            approximate_diffuse_variance=approximate_diffuse_variance)

        if self.enforce_stationarity:
            # Differencing operators are at the beginning
            init.set((0, self._k_states_diff), diffuse_type)
            # Stationary component in the middle
            init.set((self._k_states_diff,
                      self._k_states_diff + self._k_order),
                     'stationary')
            # Regression components at the end
            init.set((self._k_states_diff + self._k_order,
                      self._k_states_diff + self._k_order + self._k_exog),
                     diffuse_type)
        # If we're not enforcing a stationarity, then we cannot initialize a
        # stationary component
        else:
            init.set(None, diffuse_type)

        self.ssm.initialization = init

    @property
    def initial_design(self):
        """Initial design matrix"""
        # Basic design matrix
        design = np.r_[
            [1] * self._k_diff,
            ([0] * (self.seasonal_periods - 1) + [1]) * self._k_seasonal_diff,
            [1] * self.state_error, [0] * (self._k_order - 1)
        ]

        if len(design) == 0:
            design = np.r_[0]

        # If we have exogenous regressors included as part of the state vector
        # then the exogenous data is incorporated as a time-varying component
        # of the design matrix
        if self.state_regression:
            if self._k_order > 0:
                design = np.c_[
                    np.reshape(
                        np.repeat(design, self.nobs),
                        (design.shape[0], self.nobs)
                    ).T,
                    self.exog
                ].T[None, :, :]
            else:
                design = self.exog.T[None, :, :]
        return design

    @property
    def initial_state_intercept(self):
        """Initial state intercept vector"""
        # TODO make this self._k_trend > 1 and adjust the update to take
        # into account that if the trend is a constant, it is not time-varying
        if self._k_trend > 0:
            state_intercept = np.zeros((self.k_states, self.nobs))
        else:
            state_intercept = np.zeros((self.k_states,))
        return state_intercept

    @property
    def initial_transition(self):
        """Initial transition matrix"""
        transition = np.zeros((self.k_states, self.k_states))

        # Exogenous regressors component
        if self.state_regression:
            start = -self._k_exog
            # T_\beta
            transition[start:, start:] = np.eye(self._k_exog)

            # Autoregressive component
            start = -(self._k_exog + self._k_order)
            end = -self._k_exog if self._k_exog > 0 else None
        else:
            # Autoregressive component
            start = -self._k_order
            end = None

        # T_c
        if self._k_order > 0:
            transition[start:end, start:end] = companion_matrix(self._k_order)
            if self.hamilton_representation:
                transition[start:end, start:end] = np.transpose(
                    companion_matrix(self._k_order)
                )

        # Seasonal differencing component
        # T^*
        if self._k_seasonal_diff > 0:
            seasonal_companion = companion_matrix(self.seasonal_periods).T
            seasonal_companion[0, -1] = 1
            for d in range(self._k_seasonal_diff):
                start = self._k_diff + d * self.seasonal_periods
                end = self._k_diff + (d + 1) * self.seasonal_periods

                # T_c^*
                transition[start:end, start:end] = seasonal_companion

                # i
                if d < self._k_seasonal_diff - 1:
                    transition[start, end + self.seasonal_periods - 1] = 1

                # \iota
                transition[start, self._k_states_diff] = 1

        # Differencing component
        if self._k_diff > 0:
            idx = np.triu_indices(self._k_diff)
            # T^**
            transition[idx] = 1
            # [0 1]
            if self.seasonal_periods > 0:
                start = self._k_diff
                end = self._k_states_diff
                transition[:self._k_diff, start:end] = (
                    ([0] * (self.seasonal_periods - 1) + [1]) *
                    self._k_seasonal_diff)
            # [1 0]
            column = self._k_states_diff
            transition[:self._k_diff, column] = 1

        return transition

    @property
    def initial_selection(self):
        """Initial selection matrix"""
        if not (self.state_regression and self.time_varying_regression):
            if self.k_posdef > 0:
                selection = np.r_[
                    [0] * (self._k_states_diff),
                    [1] * (self._k_order > 0), [0] * (self._k_order - 1),
                    [0] * ((1 - self.mle_regression) * self._k_exog)
                ][:, None]

                if len(selection) == 0:
                    selection = np.zeros((self.k_states, self.k_posdef))
            else:
                selection = np.zeros((self.k_states, 0))
        else:
            selection = np.zeros((self.k_states, self.k_posdef))
            # Typical state variance
            if self._k_order > 0:
                selection[0, 0] = 1
            # Time-varying regression coefficient variances
            for i in range(self._k_exog, 0, -1):
                selection[-i, -i] = 1
        return selection

    def clone(self, endog, exog=None, **kwargs):
        return self._clone_from_init_kwds(endog, exog=exog, **kwargs)

    @property
    def _res_classes(self):
        return {'fit': (SARIMAXResults, SARIMAXResultsWrapper)}

    @staticmethod
    def _conditional_sum_squares(endog, k_ar, polynomial_ar, k_ma,
                                 polynomial_ma, k_trend=0, trend_data=None,
                                 warning_description=None):
        k = 2 * k_ma
        r = max(k + k_ma, k_ar)

        k_params_ar = 0 if k_ar == 0 else len(polynomial_ar.nonzero()[0]) - 1
        k_params_ma = 0 if k_ma == 0 else len(polynomial_ma.nonzero()[0]) - 1

        residuals = None
        if k_ar + k_ma + k_trend > 0:
            try:
                # If we have MA terms, get residuals from an AR(k) model to use
                # as data for conditional sum of squares estimates of the MA
                # parameters
                if k_ma > 0:
                    Y = endog[k:]
                    X = lagmat(endog, k, trim='both')
                    params_ar = np.linalg.pinv(X).dot(Y)
                    residuals = Y - np.dot(X, params_ar)

                # Run an ARMA(p,q) model using the just computed residuals as
                # data
                Y = endog[r:]

                X = np.empty((Y.shape[0], 0))
                if k_trend > 0:
                    if trend_data is None:
                        raise ValueError('Trend data must be provided if'
                                         ' `k_trend` > 0.')
                    X = np.c_[X, trend_data[:(-r if r > 0 else None), :]]
                if k_ar > 0:
                    cols = polynomial_ar.nonzero()[0][1:] - 1
                    X = np.c_[X, lagmat(endog, k_ar)[r:, cols]]
                if k_ma > 0:
                    cols = polynomial_ma.nonzero()[0][1:] - 1
                    X = np.c_[X, lagmat(residuals, k_ma)[r-k:, cols]]

                # Get the array of [ar_params, ma_params]
                params = np.linalg.pinv(X).dot(Y)
                residuals = Y - np.dot(X, params)
            except ValueError:
                if warning_description is not None:
                    warning_description = ' for %s' % warning_description
                else:
                    warning_description = ''
                warn('Too few observations to estimate starting parameters%s.'
                     ' All parameters except for variances will be set to'
                     ' zeros.' % warning_description)
                # Typically this will be raised if there are not enough
                # observations for the `lagmat` calls.
                params = np.zeros(k_trend + k_ar + k_ma, dtype=endog.dtype)
                if len(endog) == 0:
                    # This case usually happens when there are not even enough
                    # observations for a complete set of differencing
                    # operations (no hope of fitting, just set starting
                    # variance to 1)
                    residuals = np.ones(k_params_ma * 2 + 1, dtype=endog.dtype)
                else:
                    residuals = np.r_[
                        np.zeros(k_params_ma * 2, dtype=endog.dtype),
                        endog - np.mean(endog)]

        # Default output
        params_trend = []
        params_ar = []
        params_ma = []
        params_variance = []

        # Get the params
        offset = 0
        if k_trend > 0:
            params_trend = params[offset:k_trend + offset]
            offset += k_trend
        if k_ar > 0:
            params_ar = params[offset:k_params_ar + offset]
            offset += k_params_ar
        if k_ma > 0:
            params_ma = params[offset:k_params_ma + offset]
            offset += k_params_ma
        if residuals is not None:
            if len(residuals) > max(1, k_params_ma):
                params_variance = (residuals[k_params_ma:] ** 2).mean()
            else:
                params_variance = np.var(endog)

        return (params_trend, params_ar, params_ma,
                params_variance)

    @property
    def start_params(self):
        """
        Starting parameters for maximum likelihood estimation
        """

        # Perform differencing if necessary (i.e. if simple differencing is
        # false so that the state-space model will use the entire dataset)
        trend_data = self._trend_data
        if not self.simple_differencing and (
           self._k_diff > 0 or self._k_seasonal_diff > 0):
            endog = diff(self.endog, self._k_diff,
                         self._k_seasonal_diff, self.seasonal_periods)
            if self.exog is not None:
                exog = diff(self.exog, self._k_diff,
                            self._k_seasonal_diff, self.seasonal_periods)
            else:
                exog = None
            trend_data = trend_data[:endog.shape[0], :]
        else:
            endog = self.endog.copy()
            exog = self.exog.copy() if self.exog is not None else None
        endog = endog.squeeze()

        # Although the Kalman filter can deal with missing values in endog,
        # conditional sum of squares cannot
        if np.any(np.isnan(endog)):
            mask = ~np.isnan(endog).squeeze()
            endog = endog[mask]
            if exog is not None:
                exog = exog[mask]
            if trend_data is not None:
                trend_data = trend_data[mask]

        # Regression effects via OLS
        params_exog = []
        if self._k_exog > 0:
            params_exog = np.linalg.pinv(exog).dot(endog)
            endog = endog - np.dot(exog, params_exog)
        if self.state_regression:
            params_exog = []

        # Non-seasonal ARMA component and trend
        (params_trend, params_ar, params_ma,
         params_variance) = self._conditional_sum_squares(
            endog, self.k_ar, self.polynomial_ar, self.k_ma,
            self.polynomial_ma, self._k_trend, trend_data,
            warning_description='ARMA and trend')

        # If we have estimated non-stationary start parameters but enforce
        # stationarity is on, start with 0 parameters and warn
        invalid_ar = (
            self.k_ar > 0 and
            self.enforce_stationarity and
            not is_invertible(np.r_[1, -params_ar])
        )
        if invalid_ar:
            warn('Non-stationary starting autoregressive parameters'
                 ' found. Using zeros as starting parameters.')
            params_ar *= 0

        # If we have estimated non-invertible start parameters but enforce
        # invertibility is on, raise an error
        invalid_ma = (
            self.k_ma > 0 and
            self.enforce_invertibility and
            not is_invertible(np.r_[1, params_ma])
        )
        if invalid_ma:
            warn('Non-invertible starting MA parameters found.'
                 ' Using zeros as starting parameters.', UserWarning)
            params_ma *= 0

        # Seasonal Parameters
        _, params_seasonal_ar, params_seasonal_ma, params_seasonal_variance = (
            self._conditional_sum_squares(
                endog, self.k_seasonal_ar, self.polynomial_seasonal_ar,
                self.k_seasonal_ma, self.polynomial_seasonal_ma,
                warning_description='seasonal ARMA'))

        # If we have estimated non-stationary start parameters but enforce
        # stationarity is on, warn and set start params to 0
        invalid_seasonal_ar = (
            self.k_seasonal_ar > 0 and
            self.enforce_stationarity and
            not is_invertible(np.r_[1, -params_seasonal_ar])
        )
        if invalid_seasonal_ar:
            warn('Non-stationary starting seasonal autoregressive'
                 ' Using zeros as starting parameters.')
            params_seasonal_ar *= 0

        # If we have estimated non-invertible start parameters but enforce
        # invertibility is on, raise an error
        invalid_seasonal_ma = (
            self.k_seasonal_ma > 0 and
            self.enforce_invertibility and
            not is_invertible(np.r_[1, params_seasonal_ma])
        )
        if invalid_seasonal_ma:
            warn('Non-invertible starting seasonal moving average'
                 ' Using zeros as starting parameters.')
            params_seasonal_ma *= 0

        # Variances
        params_exog_variance = []
        if self.state_regression and self.time_varying_regression:
            # TODO how to set the initial variance parameters?
            params_exog_variance = [1] * self._k_exog
        if (self.state_error and type(params_variance) is list and
                len(params_variance) == 0):
            if not (type(params_seasonal_variance) is list and
                    len(params_seasonal_variance) == 0):
                params_variance = params_seasonal_variance
            elif self._k_exog > 0:
                params_variance = np.inner(endog, endog)
            else:
                params_variance = np.inner(endog, endog) / self.nobs
        params_measurement_variance = 1 if self.measurement_error else []

        # We want to bound the starting variance away from zero
        params_variance = np.atleast_1d(np.array(params_variance))
        if params_variance.size:
            # Avoid comparisons with empty arrays due to changes in NumPy 2.2
            params_variance = np.atleast_1d(max(params_variance[0], 1e-10))

        # Remove state variance as parameter if scale is concentrated out
        if self.concentrate_scale:
            params_variance = []

        # Combine all parameters
        return np.r_[
            params_trend,
            params_exog,
            params_ar,
            params_ma,
            params_seasonal_ar,
            params_seasonal_ma,
            params_exog_variance,
            params_measurement_variance,
            params_variance
        ]

    @property
    def endog_names(self, latex=False):
        """Names of endogenous variables"""
        diff = ''
        if self.k_diff > 0:
            if self.k_diff == 1:
                diff = r'\Delta' if latex else 'D'
            else:
                diff = (r'\Delta^%d' if latex else 'D%d') % self.k_diff

        seasonal_diff = ''
        if self.k_seasonal_diff > 0:
            if self.k_seasonal_diff == 1:
                seasonal_diff = ((r'\Delta_%d' if latex else 'DS%d') %
                                 (self.seasonal_periods))
            else:
                seasonal_diff = ((r'\Delta_%d^%d' if latex else 'D%dS%d') %
                                 (self.k_seasonal_diff, self.seasonal_periods))
        endog_diff = self.simple_differencing
        if endog_diff and self.k_diff > 0 and self.k_seasonal_diff > 0:
            return (('%s%s %s' if latex else '%s.%s.%s') %
                    (diff, seasonal_diff, self.data.ynames))
        elif endog_diff and self.k_diff > 0:
            return (('%s %s' if latex else '%s.%s') %
                    (diff, self.data.ynames))
        elif endog_diff and self.k_seasonal_diff > 0:
            return (('%s %s' if latex else '%s.%s') %
                    (seasonal_diff, self.data.ynames))
        else:
            return self.data.ynames

    params_complete = [
        'trend', 'exog', 'ar', 'ma', 'seasonal_ar', 'seasonal_ma',
        'exog_variance', 'measurement_variance', 'variance'
    ]

    @property
    def param_terms(self):
        """
        List of parameters actually included in the model, in sorted order.

        TODO Make this an dict with slice or indices as the values.
        """
        model_orders = self.model_orders
        # Get basic list from model orders
        params = [
            order for order in self.params_complete
            if model_orders[order] > 0
        ]
        # k_exog may be positive without associated parameters if it is in the
        # state vector
        if 'exog' in params and not self.mle_regression:
            params.remove('exog')

        return params

    @property
    def param_names(self):
        """
        List of human readable parameter names (for parameters actually
        included in the model).
        """
        params_sort_order = self.param_terms
        model_names = self.model_names
        return [
            name for param in params_sort_order for name in model_names[param]
        ]

    @property
    def state_names(self):
        # TODO: we may be able to revisit these states to get somewhat more
        # informative names, but ultimately probably not much better.
        # TODO: alternatively, we may be able to get better for certain models,
        # like pure AR models.
        k_ar_states = self._k_order
        if not self.simple_differencing:
            k_ar_states += (self.seasonal_periods * self._k_seasonal_diff +
                            self._k_diff)
        names = ['state.%d' % i for i in range(k_ar_states)]

        if self._k_exog > 0 and self.state_regression:
            names += ['beta.%s' % self.exog_names[i]
                      for i in range(self._k_exog)]

        return names

    @property
    def model_orders(self):
        """
        The orders of each of the polynomials in the model.
        """
        return {
            'trend': self._k_trend,
            'exog': self._k_exog,
            'ar': self.k_ar,
            'ma': self.k_ma,
            'seasonal_ar': self.k_seasonal_ar,
            'seasonal_ma': self.k_seasonal_ma,
            'reduced_ar': self.k_ar + self.k_seasonal_ar,
            'reduced_ma': self.k_ma + self.k_seasonal_ma,
            'exog_variance': self._k_exog if (
                self.state_regression and self.time_varying_regression) else 0,
            'measurement_variance': int(self.measurement_error),
            'variance': int(self.state_error and not self.concentrate_scale),
        }

    @property
    def model_names(self):
        """
        The plain text names of all possible model parameters.
        """
        return self._get_model_names(latex=False)

    @property
    def model_latex_names(self):
        """
        The latex names of all possible model parameters.
        """
        return self._get_model_names(latex=True)

    def _get_model_names(self, latex=False):
        names = {
            'trend': None,
            'exog': None,
            'ar': None,
            'ma': None,
            'seasonal_ar': None,
            'seasonal_ma': None,
            'reduced_ar': None,
            'reduced_ma': None,
            'exog_variance': None,
            'measurement_variance': None,
            'variance': None,
        }

        # Trend
        if self._k_trend > 0:
            trend_template = 't_%d' if latex else 'trend.%d'
            names['trend'] = []
            for i in self.polynomial_trend.nonzero()[0]:
                if i == 0:
                    names['trend'].append('intercept')
                elif i == 1:
                    names['trend'].append('drift')
                else:
                    names['trend'].append(trend_template % i)

        # Exogenous coefficients
        if self._k_exog > 0:
            names['exog'] = self.exog_names

        # Autoregressive
        if self.k_ar > 0:
            ar_template = '$\\phi_%d$' if latex else 'ar.L%d'
            names['ar'] = []
            for i in self.polynomial_ar.nonzero()[0][1:]:
                names['ar'].append(ar_template % i)

        # Moving Average
        if self.k_ma > 0:
            ma_template = '$\\theta_%d$' if latex else 'ma.L%d'
            names['ma'] = []
            for i in self.polynomial_ma.nonzero()[0][1:]:
                names['ma'].append(ma_template % i)

        # Seasonal Autoregressive
        if self.k_seasonal_ar > 0:
            seasonal_ar_template = (
                '$\\tilde \\phi_%d$' if latex else 'ar.S.L%d'
            )
            names['seasonal_ar'] = []
            for i in self.polynomial_seasonal_ar.nonzero()[0][1:]:
                names['seasonal_ar'].append(seasonal_ar_template % i)

        # Seasonal Moving Average
        if self.k_seasonal_ma > 0:
            seasonal_ma_template = (
                '$\\tilde \\theta_%d$' if latex else 'ma.S.L%d'
            )
            names['seasonal_ma'] = []
            for i in self.polynomial_seasonal_ma.nonzero()[0][1:]:
                names['seasonal_ma'].append(seasonal_ma_template % i)

        # Reduced Form Autoregressive
        if self.k_ar > 0 or self.k_seasonal_ar > 0:
            reduced_polynomial_ar = reduced_polynomial_ar = -np.polymul(
                self.polynomial_ar, self.polynomial_seasonal_ar
            )
            ar_template = '$\\Phi_%d$' if latex else 'ar.R.L%d'
            names['reduced_ar'] = []
            for i in reduced_polynomial_ar.nonzero()[0][1:]:
                names['reduced_ar'].append(ar_template % i)

        # Reduced Form Moving Average
        if self.k_ma > 0 or self.k_seasonal_ma > 0:
            reduced_polynomial_ma = np.polymul(
                self.polynomial_ma, self.polynomial_seasonal_ma
            )
            ma_template = '$\\Theta_%d$' if latex else 'ma.R.L%d'
            names['reduced_ma'] = []
            for i in reduced_polynomial_ma.nonzero()[0][1:]:
                names['reduced_ma'].append(ma_template % i)

        # Exogenous variances
        if self.state_regression and self.time_varying_regression:
            if not self.concentrate_scale:
                exog_var_template = ('$\\sigma_\\text{%s}^2$' if latex
                                     else 'var.%s')
            else:
                exog_var_template = (
                    '$\\sigma_\\text{%s}^2 / \\sigma_\\zeta^2$' if latex
                    else 'snr.%s')
            names['exog_variance'] = [
                exog_var_template % exog_name for exog_name in self.exog_names
            ]

        # Measurement error variance
        if self.measurement_error:
            if not self.concentrate_scale:
                meas_var_tpl = (
                    '$\\sigma_\\eta^2$' if latex else 'var.measurement_error')
            else:
                meas_var_tpl = (
                    '$\\sigma_\\eta^2 / \\sigma_\\zeta^2$' if latex
                    else 'snr.measurement_error')
            names['measurement_variance'] = [meas_var_tpl]

        # State variance
        if self.state_error and not self.concentrate_scale:
            var_tpl = '$\\sigma_\\zeta^2$' if latex else 'sigma2'
            names['variance'] = [var_tpl]

        return names

    def transform_params(self, unconstrained):
        """
        Transform unconstrained parameters used by the optimizer to constrained
        parameters used in likelihood evaluation.

        Used primarily to enforce stationarity of the autoregressive lag
        polynomial, invertibility of the moving average lag polynomial, and
        positive variance parameters.

        Parameters
        ----------
        unconstrained : array_like
            Unconstrained parameters used by the optimizer.

        Returns
        -------
        constrained : array_like
            Constrained parameters used in likelihood evaluation.

        Notes
        -----
        If the lag polynomial has non-consecutive powers (so that the
        coefficient is zero on some element of the polynomial), then the
        constraint function is not onto the entire space of invertible
        polynomials, although it only excludes a very small portion very close
        to the invertibility boundary.
        """
        unconstrained = np.array(unconstrained, ndmin=1)
        constrained = np.zeros(unconstrained.shape, unconstrained.dtype)

        start = end = 0

        # Retain the trend parameters
        if self._k_trend > 0:
            end += self._k_trend
            constrained[start:end] = unconstrained[start:end]
            start += self._k_trend

        # Retain any MLE regression coefficients
        if self.mle_regression:
            end += self._k_exog
            constrained[start:end] = unconstrained[start:end]
            start += self._k_exog

        # Transform the AR parameters (phi) to be stationary
        if self.k_ar_params > 0:
            end += self.k_ar_params
            if self.enforce_stationarity:
                constrained[start:end] = (
                    constrain_stationary_univariate(unconstrained[start:end])
                )
            else:
                constrained[start:end] = unconstrained[start:end]
            start += self.k_ar_params

        # Transform the MA parameters (theta) to be invertible
        if self.k_ma_params > 0:
            end += self.k_ma_params
            if self.enforce_invertibility:
                constrained[start:end] = (
                    -constrain_stationary_univariate(unconstrained[start:end])
                )
            else:
                constrained[start:end] = unconstrained[start:end]
            start += self.k_ma_params

        # Transform the seasonal AR parameters (\tilde phi) to be stationary
        if self.k_seasonal_ar > 0:
            end += self.k_seasonal_ar_params
            if self.enforce_stationarity:
                constrained[start:end] = (
                    constrain_stationary_univariate(unconstrained[start:end])
                )
            else:
                constrained[start:end] = unconstrained[start:end]
            start += self.k_seasonal_ar_params

        # Transform the seasonal MA parameters (\tilde theta) to be invertible
        if self.k_seasonal_ma_params > 0:
            end += self.k_seasonal_ma_params
            if self.enforce_invertibility:
                constrained[start:end] = (
                    -constrain_stationary_univariate(unconstrained[start:end])
                )
            else:
                constrained[start:end] = unconstrained[start:end]
            start += self.k_seasonal_ma_params

        # Transform the standard deviation parameters to be positive
        if self.state_regression and self.time_varying_regression:
            end += self._k_exog
            constrained[start:end] = unconstrained[start:end]**2
            start += self._k_exog
        if self.measurement_error:
            constrained[start] = unconstrained[start]**2
            start += 1
            end += 1
        if self.state_error and not self.concentrate_scale:
            constrained[start] = unconstrained[start]**2
            # start += 1
            # end += 1

        return constrained

    def untransform_params(self, constrained):
        """
        Transform constrained parameters used in likelihood evaluation
        to unconstrained parameters used by the optimizer

        Used primarily to reverse enforcement of stationarity of the
        autoregressive lag polynomial and invertibility of the moving average
        lag polynomial.

        Parameters
        ----------
        constrained : array_like
            Constrained parameters used in likelihood evaluation.

        Returns
        -------
        constrained : array_like
            Unconstrained parameters used by the optimizer.

        Notes
        -----
        If the lag polynomial has non-consecutive powers (so that the
        coefficient is zero on some element of the polynomial), then the
        constraint function is not onto the entire space of invertible
        polynomials, although it only excludes a very small portion very close
        to the invertibility boundary.
        """
        constrained = np.array(constrained, ndmin=1)
        unconstrained = np.zeros(constrained.shape, constrained.dtype)

        start = end = 0

        # Retain the trend parameters
        if self._k_trend > 0:
            end += self._k_trend
            unconstrained[start:end] = constrained[start:end]
            start += self._k_trend

        # Retain any MLE regression coefficients
        if self.mle_regression:
            end += self._k_exog
            unconstrained[start:end] = constrained[start:end]
            start += self._k_exog

        # Transform the AR parameters (phi) to be stationary
        if self.k_ar_params > 0:
            end += self.k_ar_params
            if self.enforce_stationarity:
                unconstrained[start:end] = (
                    unconstrain_stationary_univariate(constrained[start:end])
                )
            else:
                unconstrained[start:end] = constrained[start:end]
            start += self.k_ar_params

        # Transform the MA parameters (theta) to be invertible
        if self.k_ma_params > 0:
            end += self.k_ma_params
            if self.enforce_invertibility:
                unconstrained[start:end] = (
                    unconstrain_stationary_univariate(-constrained[start:end])
                )
            else:
                unconstrained[start:end] = constrained[start:end]
            start += self.k_ma_params

        # Transform the seasonal AR parameters (\tilde phi) to be stationary
        if self.k_seasonal_ar > 0:
            end += self.k_seasonal_ar_params
            if self.enforce_stationarity:
                unconstrained[start:end] = (
                    unconstrain_stationary_univariate(constrained[start:end])
                )
            else:
                unconstrained[start:end] = constrained[start:end]
            start += self.k_seasonal_ar_params

        # Transform the seasonal MA parameters (\tilde theta) to be invertible
        if self.k_seasonal_ma_params > 0:
            end += self.k_seasonal_ma_params
            if self.enforce_invertibility:
                unconstrained[start:end] = (
                    unconstrain_stationary_univariate(-constrained[start:end])
                )
            else:
                unconstrained[start:end] = constrained[start:end]
            start += self.k_seasonal_ma_params

        # Untransform the standard deviation
        if self.state_regression and self.time_varying_regression:
            end += self._k_exog
            unconstrained[start:end] = constrained[start:end]**0.5
            start += self._k_exog
        if self.measurement_error:
            unconstrained[start] = constrained[start]**0.5
            start += 1
            end += 1
        if self.state_error and not self.concentrate_scale:
            unconstrained[start] = constrained[start]**0.5
            # start += 1
            # end += 1

        return unconstrained

    def _validate_can_fix_params(self, param_names):
        super()._validate_can_fix_params(param_names)
        model_names = self.model_names

        items = [
            ('ar', 'autoregressive', self.enforce_stationarity,
                '`enforce_stationarity=True`'),
            ('seasonal_ar', 'seasonal autoregressive',
                self.enforce_stationarity, '`enforce_stationarity=True`'),
            ('ma', 'moving average', self.enforce_invertibility,
                '`enforce_invertibility=True`'),
            ('seasonal_ma', 'seasonal moving average',
                self.enforce_invertibility, '`enforce_invertibility=True`')]

        for name, title, condition, condition_desc in items:
            names = set(model_names[name] or [])
            fix_all = param_names.issuperset(names)
            fix_any = len(param_names.intersection(names)) > 0
            if condition and fix_any and not fix_all:
                raise ValueError('Cannot fix individual %s parameters when'
                                 ' %s. Must either fix all %s parameters or'
                                 ' none.' % (title, condition_desc, title))

    def update(self, params, transformed=True, includes_fixed=False,
               complex_step=False):
        """
        Update the parameters of the model

        Updates the representation matrices to fill in the new parameter
        values.

        Parameters
        ----------
        params : array_like
            Array of new parameters.
        transformed : bool, optional
            Whether or not `params` is already transformed. If set to False,
            `transform_params` is called. Default is True..

        Returns
        -------
        params : array_like
            Array of parameters.
        """
        params = self.handle_params(params, transformed=transformed,
                                    includes_fixed=includes_fixed)

        params_trend = None
        params_exog = None
        params_ar = None
        params_ma = None
        params_seasonal_ar = None
        params_seasonal_ma = None
        params_exog_variance = None
        params_measurement_variance = None
        params_variance = None

        # Extract the parameters
        start = end = 0
        end += self._k_trend
        params_trend = params[start:end]
        start += self._k_trend
        if self.mle_regression:
            end += self._k_exog
            params_exog = params[start:end]
            start += self._k_exog
        end += self.k_ar_params
        params_ar = params[start:end]
        start += self.k_ar_params
        end += self.k_ma_params
        params_ma = params[start:end]
        start += self.k_ma_params
        end += self.k_seasonal_ar_params
        params_seasonal_ar = params[start:end]
        start += self.k_seasonal_ar_params
        end += self.k_seasonal_ma_params
        params_seasonal_ma = params[start:end]
        start += self.k_seasonal_ma_params
        if self.state_regression and self.time_varying_regression:
            end += self._k_exog
            params_exog_variance = params[start:end]
            start += self._k_exog
        if self.measurement_error:
            params_measurement_variance = params[start]
            start += 1
            end += 1
        if self.state_error and not self.concentrate_scale:
            params_variance = params[start]
        # start += 1
        # end += 1

        # Update lag polynomials
        if self.k_ar > 0:
            if self._polynomial_ar.dtype == params.dtype:
                self._polynomial_ar[self._polynomial_ar_idx] = -params_ar
            else:
                polynomial_ar = self._polynomial_ar.real.astype(params.dtype)
                polynomial_ar[self._polynomial_ar_idx] = -params_ar
                self._polynomial_ar = polynomial_ar

        if self.k_ma > 0:
            if self._polynomial_ma.dtype == params.dtype:
                self._polynomial_ma[self._polynomial_ma_idx] = params_ma
            else:
                polynomial_ma = self._polynomial_ma.real.astype(params.dtype)
                polynomial_ma[self._polynomial_ma_idx] = params_ma
                self._polynomial_ma = polynomial_ma

        if self.k_seasonal_ar > 0:
            idx = self._polynomial_seasonal_ar_idx
            if self._polynomial_seasonal_ar.dtype == params.dtype:
                self._polynomial_seasonal_ar[idx] = -params_seasonal_ar
            else:
                polynomial_seasonal_ar = (
                    self._polynomial_seasonal_ar.real.astype(params.dtype)
                )
                polynomial_seasonal_ar[idx] = -params_seasonal_ar
                self._polynomial_seasonal_ar = polynomial_seasonal_ar

        if self.k_seasonal_ma > 0:
            idx = self._polynomial_seasonal_ma_idx
            if self._polynomial_seasonal_ma.dtype == params.dtype:
                self._polynomial_seasonal_ma[idx] = params_seasonal_ma
            else:
                polynomial_seasonal_ma = (
                    self._polynomial_seasonal_ma.real.astype(params.dtype)
                )
                polynomial_seasonal_ma[idx] = params_seasonal_ma
                self._polynomial_seasonal_ma = polynomial_seasonal_ma

        # Get the reduced form lag polynomial terms by multiplying the regular
        # and seasonal lag polynomials
        # Note: that although the numpy np.polymul examples assume that they
        # are ordered from highest degree to lowest, whereas our are from
        # lowest to highest, it does not matter.
        if self.k_seasonal_ar > 0:
            reduced_polynomial_ar = -np.polymul(
                self._polynomial_ar, self._polynomial_seasonal_ar
            )
        else:
            reduced_polynomial_ar = -self._polynomial_ar
        if self.k_seasonal_ma > 0:
            reduced_polynomial_ma = np.polymul(
                self._polynomial_ma, self._polynomial_seasonal_ma
            )
        else:
            reduced_polynomial_ma = self._polynomial_ma

        # Observation intercept
        # Exogenous data with MLE estimation of parameters enters through a
        # time-varying observation intercept (is equivalent to simply
        # subtracting it out of the endogenous variable first)
        if self.mle_regression:
            self.ssm['obs_intercept'] = np.dot(self.exog, params_exog)[None, :]

        # State intercept (Harvey) or additional observation intercept
        # (Hamilton)
        # SARIMA trend enters through the a time-varying state intercept,
        # associated with the first row of the stationary component of the
        # state vector (i.e. the first element of the state vector following
        # any differencing elements)
        if self._k_trend > 0:
            data = np.dot(self._trend_data, params_trend).astype(params.dtype)
            if not self.hamilton_representation:
                self.ssm['state_intercept', self._k_states_diff, :] = data
            else:
                # The way the trend enters in the Hamilton representation means
                # that the parameter is not an ``intercept'' but instead the
                # mean of the process. The trend values in `data` are meant for
                # an intercept, and so must be transformed to represent the
                # mean instead
                if self.hamilton_representation:
                    data /= np.sum(-reduced_polynomial_ar)

                # If we already set the observation intercept for MLE
                # regression, just add to it
                if self.mle_regression:
                    self.ssm.obs_intercept += data[None, :]
                # Otherwise set it directly
                else:
                    self.ssm['obs_intercept'] = data[None, :]

        # Observation covariance matrix
        if self.measurement_error:
            self.ssm['obs_cov', 0, 0] = params_measurement_variance

        # Transition matrix
        if self.k_ar > 0 or self.k_seasonal_ar > 0:
            self.ssm[self.transition_ar_params_idx] = reduced_polynomial_ar[1:]
        elif not self.ssm.transition.dtype == params.dtype:
            # This is required if the transition matrix is not really in use
            # (e.g. for an MA(q) process) so that it's dtype never changes as
            # the parameters' dtype changes. This changes the dtype manually.
            self.ssm['transition'] = self.ssm['transition'].real.astype(
                params.dtype)

        # Selection matrix (Harvey) or Design matrix (Hamilton)
        if self.k_ma > 0 or self.k_seasonal_ma > 0:
            if not self.hamilton_representation:
                self.ssm[self.selection_ma_params_idx] = (
                    reduced_polynomial_ma[1:]
                )
            else:
                self.ssm[self.design_ma_params_idx] = reduced_polynomial_ma[1:]

        # State covariance matrix
        if self.k_posdef > 0:
            if not self.concentrate_scale:
                self['state_cov', 0, 0] = params_variance
            if self.state_regression and self.time_varying_regression:
                self.ssm[self._exog_variance_idx] = params_exog_variance

        return params

    def _get_extension_time_varying_matrices(
            self, params, exog, out_of_sample, extend_kwargs=None,
            transformed=True, includes_fixed=False, **kwargs):
        """
        Get time-varying state space system matrices for extended model

        Notes
        -----
        We need to override this method for SARIMAX because we need some
        special handling in the `simple_differencing=True` case.
        """

        # Get the appropriate exog for the extended sample
        exog = self._validate_out_of_sample_exog(exog, out_of_sample)

        # Get the tmp endog, exog
        if self.simple_differencing:
            nobs = self.data.orig_endog.shape[0] + out_of_sample
            tmp_endog = np.zeros((nobs, self.k_endog))
            if exog is not None:
                tmp_exog = np.c_[self.data.orig_exog.T, exog.T].T
            else:
                tmp_exog = None
        else:
            tmp_endog = np.zeros((out_of_sample, self.k_endog))
            tmp_exog = exog

        # Create extended model
        if extend_kwargs is None:
            extend_kwargs = {}
        if not self.simple_differencing and self.k_trend > 0:
            extend_kwargs.setdefault(
                'trend_offset', self.trend_offset + self.nobs)
        extend_kwargs.setdefault('validate_specification', False)
        mod_extend = self.clone(
            endog=tmp_endog, exog=tmp_exog, **extend_kwargs)
        mod_extend.update(params, transformed=transformed,
                          includes_fixed=includes_fixed,)

        # Retrieve the extensions to the time-varying system matrices and
        # put them in kwargs
        for name in self.ssm.shapes.keys():
            if name == 'obs' or name in kwargs:
                continue
            original = getattr(self.ssm, name)
            extended = getattr(mod_extend.ssm, name)
            so = original.shape[-1]
            se = extended.shape[-1]
            if ((so > 1 or se > 1) or (
                    so == 1 and self.nobs == 1 and
                    np.any(original[..., 0] != extended[..., 0]))):
                kwargs[name] = extended[..., -out_of_sample:]

        return kwargs


class SARIMAXResults(MLEResults):
    """
    Class to hold results from fitting an SARIMAX model.

    Parameters
    ----------
    model : SARIMAX instance
        The fitted model instance

    Attributes
    ----------
    specification : dictionary
        Dictionary including all attributes from the SARIMAX model instance.
    polynomial_ar : ndarray
        Array containing autoregressive lag polynomial coefficients,
        ordered from lowest degree to highest. Initialized with ones, unless
        a coefficient is constrained to be zero (in which case it is zero).
    polynomial_ma : ndarray
        Array containing moving average lag polynomial coefficients,
        ordered from lowest degree to highest. Initialized with ones, unless
        a coefficient is constrained to be zero (in which case it is zero).
    polynomial_seasonal_ar : ndarray
        Array containing seasonal autoregressive lag polynomial coefficients,
        ordered from lowest degree to highest. Initialized with ones, unless
        a coefficient is constrained to be zero (in which case it is zero).
    polynomial_seasonal_ma : ndarray
        Array containing seasonal moving average lag polynomial coefficients,
        ordered from lowest degree to highest. Initialized with ones, unless
        a coefficient is constrained to be zero (in which case it is zero).
    polynomial_trend : ndarray
        Array containing trend polynomial coefficients, ordered from lowest
        degree to highest. Initialized with ones, unless a coefficient is
        constrained to be zero (in which case it is zero).
    model_orders : list of int
        The orders of each of the polynomials in the model.
    param_terms : list of str
        List of parameters actually included in the model, in sorted order.

    See Also
    --------
    statsmodels.tsa.statespace.kalman_filter.FilterResults
    statsmodels.tsa.statespace.mlemodel.MLEResults
    """
    def __init__(self, model, params, filter_results, cov_type=None,
                 **kwargs):
        super().__init__(model, params, filter_results, cov_type, **kwargs)

        self.df_resid = np.inf  # attribute required for wald tests

        # Save _init_kwds
        self._init_kwds = self.model._get_init_kwds()

        # Save model specification
        self.specification = Bunch(**{
            # Set additional model parameters
            'seasonal_periods': self.model.seasonal_periods,
            'measurement_error': self.model.measurement_error,
            'time_varying_regression': self.model.time_varying_regression,
            'simple_differencing': self.model.simple_differencing,
            'enforce_stationarity': self.model.enforce_stationarity,
            'enforce_invertibility': self.model.enforce_invertibility,
            'hamilton_representation': self.model.hamilton_representation,
            'concentrate_scale': self.model.concentrate_scale,
            'trend_offset': self.model.trend_offset,

            'order': self.model.order,
            'seasonal_order': self.model.seasonal_order,

            # Model order
            'k_diff': self.model.k_diff,
            'k_seasonal_diff': self.model.k_seasonal_diff,
            'k_ar': self.model.k_ar,
            'k_ma': self.model.k_ma,
            'k_seasonal_ar': self.model.k_seasonal_ar,
            'k_seasonal_ma': self.model.k_seasonal_ma,

            # Param Numbers
            'k_ar_params': self.model.k_ar_params,
            'k_ma_params': self.model.k_ma_params,

            # Trend / Regression
            'trend': self.model.trend,
            'k_trend': self.model.k_trend,
            'k_exog': self.model.k_exog,

            'mle_regression': self.model.mle_regression,
            'state_regression': self.model.state_regression,
        })

        # Polynomials
        self.polynomial_trend = self.model._polynomial_trend
        self.polynomial_ar = self.model._polynomial_ar
        self.polynomial_ma = self.model._polynomial_ma
        self.polynomial_seasonal_ar = self.model._polynomial_seasonal_ar
        self.polynomial_seasonal_ma = self.model._polynomial_seasonal_ma
        self.polynomial_reduced_ar = np.polymul(
            self.polynomial_ar, self.polynomial_seasonal_ar
        )
        self.polynomial_reduced_ma = np.polymul(
            self.polynomial_ma, self.polynomial_seasonal_ma
        )

        # Distinguish parameters
        self.model_orders = self.model.model_orders
        self.param_terms = self.model.param_terms
        start = end = 0
        for name in self.param_terms:
            if name == 'ar':
                k = self.model.k_ar_params
            elif name == 'ma':
                k = self.model.k_ma_params
            elif name == 'seasonal_ar':
                k = self.model.k_seasonal_ar_params
            elif name == 'seasonal_ma':
                k = self.model.k_seasonal_ma_params
            else:
                k = self.model_orders[name]
            end += k
            setattr(self, '_params_%s' % name, self.params[start:end])
            start += k
        # GH7527, all terms must be defined
        all_terms = ['ar', 'ma', 'seasonal_ar', 'seasonal_ma', 'variance']
        for name in set(all_terms).difference(self.param_terms):
            setattr(self, '_params_%s' % name, np.empty(0))

        # Handle removing data
        self._data_attr_model.extend(['orig_endog', 'orig_exog'])

    def extend(self, endog, exog=None, **kwargs):
        kwargs.setdefault('trend_offset', self.nobs + 1)
        return super().extend(endog, exog=exog, **kwargs)

    @cache_readonly
    def arroots(self):
        """
        (array) Roots of the reduced form autoregressive lag polynomial
        """
        return np.roots(self.polynomial_reduced_ar)**-1

    @cache_readonly
    def maroots(self):
        """
        (array) Roots of the reduced form moving average lag polynomial
        """
        return np.roots(self.polynomial_reduced_ma)**-1

    @cache_readonly
    def arfreq(self):
        """
        (array) Frequency of the roots of the reduced form autoregressive
        lag polynomial
        """
        z = self.arroots
        if not z.size:
            return
        return np.arctan2(z.imag, z.real) / (2 * np.pi)

    @cache_readonly
    def mafreq(self):
        """
        (array) Frequency of the roots of the reduced form moving average
        lag polynomial
        """
        z = self.maroots
        if not z.size:
            return
        return np.arctan2(z.imag, z.real) / (2 * np.pi)

    @cache_readonly
    def arparams(self):
        """
        (array) Autoregressive parameters actually estimated in the model.
        Does not include seasonal autoregressive parameters (see
        `seasonalarparams`) or parameters whose values are constrained to be
        zero.
        """
        return self._params_ar

    @cache_readonly
    def seasonalarparams(self):
        """
        (array) Seasonal autoregressive parameters actually estimated in the
        model. Does not include nonseasonal autoregressive parameters (see
        `arparams`) or parameters whose values are constrained to be zero.
        """
        return self._params_seasonal_ar

    @cache_readonly
    def maparams(self):
        """
        (array) Moving average parameters actually estimated in the model.
        Does not include seasonal moving average parameters (see
        `seasonalmaparams`) or parameters whose values are constrained to be
        zero.
        """
        return self._params_ma

    @cache_readonly
    def seasonalmaparams(self):
        """
        (array) Seasonal moving average parameters actually estimated in the
        model. Does not include nonseasonal moving average parameters (see
        `maparams`) or parameters whose values are constrained to be zero.
        """
        return self._params_seasonal_ma

    @Appender(MLEResults.summary.__doc__)
    def summary(self, alpha=.05, start=None):
        # Create the model name

        # See if we have an ARIMA component
        order = ''
        if self.model.k_ar + self.model.k_diff + self.model.k_ma > 0:
            if self.model.k_ar == self.model.k_ar_params:
                order_ar = self.model.k_ar
            else:
                order_ar = list(self.model._spec.ar_lags)
            if self.model.k_ma == self.model.k_ma_params:
                order_ma = self.model.k_ma
            else:
                order_ma = list(self.model._spec.ma_lags)
            # If there is simple differencing, then that is reflected in the
            # dependent variable name
            k_diff = 0 if self.model.simple_differencing else self.model.k_diff
            order = '(%s, %d, %s)' % (order_ar, k_diff, order_ma)
        # See if we have an SARIMA component
        seasonal_order = ''
        has_seasonal = (
            self.model.k_seasonal_ar +
            self.model.k_seasonal_diff +
            self.model.k_seasonal_ma
        ) > 0
        if has_seasonal:
            tmp = int(self.model.k_seasonal_ar / self.model.seasonal_periods)
            if tmp == self.model.k_seasonal_ar_params:
                order_seasonal_ar = (
                    int(self.model.k_seasonal_ar / self.model.seasonal_periods)
                )
            else:
                order_seasonal_ar = list(self.model._spec.seasonal_ar_lags)
            tmp = int(self.model.k_seasonal_ma / self.model.seasonal_periods)
            if tmp == self.model.k_ma_params:
                order_seasonal_ma = tmp
            else:
                order_seasonal_ma = list(self.model._spec.seasonal_ma_lags)
            # If there is simple differencing, then that is reflected in the
            # dependent variable name
            k_seasonal_diff = self.model.k_seasonal_diff
            if self.model.simple_differencing:
                k_seasonal_diff = 0
            seasonal_order = ('(%s, %d, %s, %d)' %
                              (str(order_seasonal_ar), k_seasonal_diff,
                               str(order_seasonal_ma),
                               self.model.seasonal_periods))
            if not order == '':
                order += 'x'
        model_name = f'{self.model.__class__.__name__}{order}{seasonal_order}'

        return super().summary(
            alpha=alpha,
            start=start,
            title='SARIMAX Results',
            model_name=model_name
        )


class SARIMAXResultsWrapper(MLEResultsWrapper):
    _attrs = {}
    _wrap_attrs = wrap.union_dicts(MLEResultsWrapper._wrap_attrs,
                                   _attrs)
    _methods = {}
    _wrap_methods = wrap.union_dicts(MLEResultsWrapper._wrap_methods,
                                     _methods)
wrap.populate_wrapper(SARIMAXResultsWrapper, SARIMAXResults)  # noqa:E305