
    M/Ph                     T   d Z ddlmZ ddlmZ ddlZddlZddl	m
Z
 ddlmZmZ ddlmZmZ ddlmZmZmZmZ dd	lmZmZmZmZmZmZ dd
lmZ g dZdZ d Z!d Z" G d d          Z#d7dZ$d7dZ%	 	 d8dZ&	 	 d9dZ' edd          d:dddddd            Z( edd          d;d            Z) edd          d:d            Z*d ej+        d!e,d"dfd#Z-d<d$Z.d% Z/	 	 d=d'Z0 ed(d          	 	 d>d+            Z1d?d,Z2	 	 d@d.Z3dAd/Z4d0 Z5 ed1d          	 	 dBd4            Z6d5 Z7dCd6Z8dS )Da  
Various Statistical Tests

Author: josef-pktd
License: BSD-3

Notes
-----
Almost fully verified against R or Gretl, not all options are the same.
In many cases of Lagrange multiplier tests both the LM test and the F test is
returned. In some but not all cases, R has the option to choose the test
statistic. Some alternative test statistic results have not been verified.

TODO
* refactor to store intermediate results

missing:

* pvalues for breaks_hansen
* additional options, compare with R, check where ddof is appropriate
* new tests:
  - breaks_ap, more recent breaks tests
  - specification tests against nonparametric alternatives
    )deprecate_kwarg)IterableN)stats)OLSRegressionResultsWrapper)anderson_statistic	normal_ad)kstest_exponential
kstest_fitkstest_normal
lilliefors)
array_like	bool_like	dict_like
float_likeint_likestring_like)lagmat)r   r   r   r
   r	   compare_cox	compare_jacorr_breusch_godfreyacorr_ljungboxacorr_lmhet_archhet_breuschpaganhet_goldfeldquandt	het_white
spec_white	linear_lmlinear_rainbowlinear_harvey_collierr   z_The exog in results_x and in results_z are nested. {test} requires that models are non-nested.
c                    | j         d         |j         d         k    rdS t          j                            || d          d         }| ||z  z
  }t          j                            t          j        ||f                   |j         d         k    S )a  
    Check if a larger exog nests a smaller exog

    Parameters
    ----------
    small : ndarray
        exog from smaller model
    large : ndarray
        exog from larger model

    Returns
    -------
    bool
        True if small is nested by large
       FNrcondr   )shapenplinalglstsqmatrix_rankc_)smalllargecoeferrs       \/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/stats/diagnostic.py_check_nested_exogr1   @   sx    " {1~A&&u9??5%t?44Q7D
%$,
C9  ucz!233u{1~EE    c                    t          | t                    st          d          t          |t                    st          d          t          j        | j        j        |j        j                  st          d          | j        j        }|j        j        }d}|j	        d         |j	        d         k    r|pt          ||          }n|pt          ||          }|S )Nz2results_x must come from a linear regression modelz2results_z must come from a linear regression modelz/endogenous variables in models are not the sameFr#   )
isinstancer   	TypeErrorr'   allclosemodelendog
ValueErrorexogr&   r1   )	results_x	results_zxznesteds        r0   _check_nested_resultsr@   X   s    i!9:: NLMMMi!9:: NLMMM;y,io.CDD LJKKKAAFwqzQWQZ3-a333-a33Mr2   c                       e Zd Zd ZdS )ResultsStorec                 8    t          | d| j        j                  S )N_str)getattr	__class____name__)selfs    r0   __str__zResultsStore.__str__l   s    tVT^%<===r2   N)rG   
__module____qualname__rI    r2   r0   rB   rB   k   s#        > > > > >r2   rB   Fc                    t          | |          r(t          t                              d                    | j        j        }|j        j        }| j        j        j        d         }| j        |z  }|j        |z  }| j	        }t          ||                                          }	|	j        }
t          |
|                                          }|j        }|t          j        |
j        |
          |z  z   }|dz  t          j        |          t          j        |          z
  z  }|t          j        |j        |          z  |dz  z  }|t          j        |          z  }dt$          j                            t          j        |                    z  }|rNt-                      }|	|_        ||_        ||_        ||_        ||_        ||_        t$          j        |_        |||fS ||fS )ad  
    Compute the Cox test for non-nested models

    Parameters
    ----------
    results_x : Result instance
        result instance of first model
    results_z : Result instance
        result instance of second model
    store : bool, default False
        If true, then the intermediate results are returned.

    Returns
    -------
    tstat : float
        t statistic for the test that including the fitted values of the
        first model in the second model has no effect.
    pvalue : float
        two-sided pvalue for the t statistic
    res_store : ResultsStore, optional
        Intermediate results. Returned if store is True.

    Notes
    -----
    Tests of non-nested hypothesis might not provide unambiguous answers.
    The test should be performed in both directions and it is possible
    that both or neither test rejects. see [1]_ for more information.

    Formulas from [1]_, section 8.3.4 translated to code

    Matches results for Example 8.3 in Greene

    References
    ----------
    .. [1] Greene, W. H. Econometric Analysis. New Jersey. Prentice Hall;
       5th edition. (2002).
    zCox comparisontestr          @   )r@   r9   NESTED_ERRORformatr7   r:   r8   r&   ssrfittedvaluesr   fitresidr'   dotTlogsqrtr   normsfabsrB   res_dxres_xzxc01v01qpvaluedist)r;   r<   storer=   r>   nobssigma2_xsigma2_zyhat_xr_   err_zxr`   err_xzx	sigma2_zxra   rb   rc   pvalress                      r0   r   r   p   s   L Y	22 E,,2B,CCDDDAA? &q)D}t#H}t#H#F^^!!F\F&!nn  ""GmG26&(F33d::I
)rvh''"&*;*;;
<C
RVGIw//
/)q.
@CbgcllAuz}}RVAYY'''D 	nn

:$|d7Nr2   c                    t          | |          r(t          t                              d                    | j        j        }|j        j        }| j        }t          |t          j
        ||f                                                    }|j        d         }|j        d         }|rFt                      }	||	_        t!          j        |j                  |	_        ||	_        ||	_        |||	fS ||fS )af  
    Compute the J-test for non-nested models

    Parameters
    ----------
    results_x : RegressionResults
        The result instance of first model.
    results_z : RegressionResults
        The result instance of second model.
    store : bool, default False
        If true, then the intermediate results are returned.

    Returns
    -------
    tstat : float
        t statistic for the test that including the fitted values of the
        first model in the second model has no effect.
    pvalue : float
        two-sided pvalue for the t statistic
    res_store : ResultsStore, optional
        Intermediate results. Returned if store is True.

    Notes
    -----
    From description in Greene, section 8.3.3. Matches results for Example
    8.3, Greene.

    Tests of non-nested hypothesis might not provide unambiguous answers.
    The test should be performed in both directions and it is possible
    that both or neither test rejects. see Greene for more information.

    References
    ----------
    .. [1] Greene, W. H. Econometric Analysis. New Jersey. Prentice Hall;
       5th edition. (2002).
    zJ comparisonrN   r   )r@   r9   rR   rS   r7   r8   r:   rU   r   r'   column_stackrV   tvaluespvaluesrB   res_zxr   tdf_residre   teststatrd   )
r;   r<   rf   yr>   rj   rt   tstatrn   ro   s
             r0   r   r      s    L Y	22 C,,.,AABBBAA#FBOVQK00115577FN1E>!D  nn
76?++
dC$;r2   	nonrobustc                 6   t          | |          r(t          t                              d                    | j        j        }| j        j        }|j        j        }d } ||||||          } ||||||          }	t          j        ||	gddgg d          S )a  
    Davidson-MacKinnon encompassing test for comparing non-nested models

    Parameters
    ----------
    results_x : Result instance
        result instance of first model
    results_z : Result instance
        result instance of second model
    cov_type : str, default "nonrobust
        Covariance type. The default is "nonrobust` which uses the classic
        OLS covariance estimator. Specify one of "HC0", "HC1", "HC2", "HC3"
        to use White's covariance estimator. All covariance types supported
        by ``OLS.fit`` are accepted.
    cov_kwargs : dict, default None
        Dictionary of covariance options passed to ``OLS.fit``. See OLS.fit
        for more details.

    Returns
    -------
    DataFrame
        A DataFrame with two rows and four columns. The row labeled x
        contains results for the null that the model contained in
        results_x is equivalent to the encompassing model. The results in
        the row labeled z correspond to the test that the model contained
        in results_z are equivalent to the encompassing model. The columns
        are the test statistic, its p-value, and the numerator and
        denominator degrees of freedom. The test statistic has an F
        distribution. The numerator degree of freedom is the number of
        variables in the encompassing model that are not in the x or z model.
        The denominator degree of freedom is the number of observations minus
        the number of variables in the nesting model.

    Notes
    -----
    The null is that the fit produced using x is the same as the fit
    produced using both x and z. When testing whether x is encompassed,
    the model estimated is

    .. math::

        Y = X\beta + Z_1\gamma + \epsilon

    where :math:`Z_1` are the columns of :math:`Z` that are not spanned by
    :math:`X`. The null is :math:`H_0:\gamma=0`. When testing whether z is
    encompassed, the roles of :math:`X` and :math:`Z` are reversed.

    Implementation of  Davidson and MacKinnon (1993)'s encompassing test.
    Performs two Wald tests where models x and z are compared to a model
    that nests the two. The Wald tests are performed by using an OLS
    regression.
    zTesting encompassingrN   c                 P   ||t           j                            ||d           d         z  z
  }t           j                            |          \  }}}t          j        t           j                  j        }	|                    dd          t          |j                  z  |	z  }
t          j	        |          |
k    }||d d |f         z  }t          j
        ||g          }|j        d         }|j        d         }t          | |                              ||          }t          j        ||f          }t          j        |          |d d | d f<   |                    |dd          }|j        |j        }}t%          |j                  t%          |j                  }}||||fS )	Nr$   r   T)axiskeepdimsr#   )cov_typecov_kwdsuse_fscalar)r'   r(   r)   svdfinfodoubleepsmaxr&   r^   hstackr   rV   zeroseye	wald_test	statisticrd   intdf_numdf_denom)r8   abcov_estr   r/   usvr   tolnon_zeroaugaug_regk_akro   r_matrixrO   statrd   r   r   s                          r0   _test_nestedz*compare_encompassing.<locals>._test_nested-  st   !biooa$o77:::)--$$1ahry!!%eede++c#)nn<sB6!99s?AaaakN")QH%%ilM!%!!%%w%JJ8S!H%%VC[[SDEE}}XT$}??~t{ft{++S-?-?VVX--r2   r=   r>   )r   rd   r   r   )indexcolumns)	r@   r9   rR   rS   r7   r8   r:   pd	DataFrame)
r;   r<   r   
cov_kwargsrx   r=   r>   r   x_nestedz_nesteds
             r0   compare_encompassingr      s    l Y	22 K,,2H,IIJJJAAA. . .& |Aq!Xz::H|Aq!Xz::H<8,"Cj H H HJ J J Jr2   Tc           
      d   ddl m} t          | d          } t          |dd          }t          |dd          }||d
k    rt	          d          |dk     rt	          d          | j        d         }|r|d
z
  }	 || |	d          }
|sH||dz   z  t          j        |
d
|	d
z            dz  |t          j        d
|	d
z             z
  z            z  }n%|t          j        |
d
|	d
z            dz            z  }d}t          j	        |t          j
        |          z            }t          j        |
                                          t          j	        |          z  }||k    r.|t          j        d
|          t          j
        |          z  z
  }n|dt          j        d
|          z  z
  }t          j        |          }t          d
|          }t          |d          }t          j        d
|d
z             }n|4t          j        d
t          |dz  d|z            d
z   t                    }np|1t          j        d
t          |dz  d          d
z   t                    }n=t!          |t"                    s(t          |d          }t          j        d
|d
z             }t          |dd          }|                                }	 || |	d          }
|
d
|	d
z            dz  |t          j        d
|	d
z             z
  z  }||dz   z  t          j        |          |d
z
           z  }||z
  }t          j        |t          j                  }|dk    }t(          j                            ||         ||                   ||<   |st/          j        ||d|          S |t          j        |
d
|	d
z            dz            |d
z
           z  }t          j        |t          j                  }t(          j                            ||         ||                   ||<   t/          j        ||||d|          S )a  
    Ljung-Box test of autocorrelation in residuals.

    Parameters
    ----------
    x : array_like
        The data series. The data is demeaned before the test statistic is
        computed.
    lags : {int, array_like}, default None
        If lags is an integer then this is taken to be the largest lag
        that is included, the test result is reported for all smaller lag
        length. If lags is a list or array, then all lags are included up to
        the largest lag in the list, however only the tests for the lags in
        the list are reported. If lags is None, then the default maxlag is
        min(10, nobs // 5). The default number of lags changes if period
        is set.
    boxpierce : bool, default False
        If true, then additional to the results of the Ljung-Box test also the
        Box-Pierce test results are returned.
    model_df : int, default 0
        Number of degrees of freedom consumed by the model. In an ARMA model,
        this value is usually p+q where p is the AR order and q is the MA
        order. This value is subtracted from the degrees-of-freedom used in
        the test so that the adjusted dof for the statistics are
        lags - model_df. If lags - model_df <= 0, then NaN is returned.
    period : int, default None
        The period of a Seasonal time series.  Used to compute the max lag
        for seasonal data which uses min(2*period, nobs // 5) if set. If None,
        then the default rule is used to set the number of lags. When set, must
        be >= 2.
    auto_lag : bool, default False
        Flag indicating whether to automatically determine the optimal lag
        length based on threshold of maximum correlation value.

    Returns
    -------
    DataFrame
        Frame with columns:

        * lb_stat - The Ljung-Box test statistic.
        * lb_pvalue - The p-value based on chi-square distribution. The
          p-value is computed as 1 - chi2.cdf(lb_stat, dof) where dof is
          lag - model_df. If lag - model_df <= 0, then NaN is returned for
          the pvalue.
        * bp_stat - The Box-Pierce test statistic.
        * bp_pvalue - The p-value based for Box-Pierce test on chi-square
          distribution. The p-value is computed as 1 - chi2.cdf(bp_stat, dof)
          where dof is lag - model_df. If lag - model_df <= 0, then NaN is
          returned for the pvalue.

    See Also
    --------
    statsmodels.regression.linear_model.OLS.fit
        Regression model fitting.
    statsmodels.regression.linear_model.RegressionResults
        Results from linear regression models.
    statsmodels.stats.stattools.q_stat
        Ljung-Box test statistic computed from estimated
        autocorrelations.

    Notes
    -----
    Ljung-Box and Box-Pierce statistic differ in their scaling of the
    autocorrelation function. Ljung-Box test is has better finite-sample
    properties.

    References
    ----------
    .. [*] Green, W. "Econometric Analysis," 5th ed., Pearson, 2003.
    .. [*] J. Carlos Escanciano, Ignacio N. Lobato
          "An automatic Portmanteau test for serial correlation".,
          Volume 151, 2009.

    Examples
    --------
    >>> import statsmodels.api as sm
    >>> data = sm.datasets.sunspots.load_pandas().data
    >>> res = sm.tsa.ARMA(data["SUNACTIVITY"], (1,1)).fit(disp=-1)
    >>> sm.stats.acorr_ljungbox(res.resid, lags=[10], return_df=True)
           lb_stat     lb_pvalue
    10  214.106992  1.827374e-40
    r   )acfr=   periodToptionalmodel_dfFNr#   zperiod must be >= 2zmodel_df must be >= 0)nlagsfftrQ   g333333@lags   dtype
   r   )lb_stat	lb_pvalue)r   )r   r   bp_stat	bp_pvalue)statsmodels.tsa.stattoolsr   r   r   r9   r&   r'   cumsumaranger[   rZ   r^   r   argmaxminr   r4   r   	full_likenanr   chi2r]   r   r   )r=   r   	boxpiercer   r   	return_dfauto_lagr   rg   maxlagsacfq_sacfrc   	thresholdthreshold_metricsacf2	qljungboxadj_lagsrn   loc
qboxpiercepvalbps                         r0   r   r   G  sP   j .-----1cAfh666F*u===Hfkk.///!||011171:D #& s1F... 	?dQh'iQvz\ 2a 7#'")Avz*B*B#B!D E EEFF BId1VaZ<&8A&=>>>F GAt,--	6$<<++--= 	))ryD11BF4LL@AFFq29Q#5#556F y  1d||f%%yD1H%%		yC	1v:66:#FFF	yC	2..2#>>>h'' &f%%yD1H%%dF%000DXXZZF 3qE***D6A:!#tbi6A:.F.F'FGEq!BIe$4$4TAX$>>IhH<	26**D
Q,C
inhsm<<DI (|	EE"&( ( ( 	( 	$q!|"4"9::4!8DDJ\)RV,,F*--
3#??F3K<ID$.VE E"$ $ $ $r2   r   r   )r   ddofr   r   c                   t          | dd          } t          |d          }|i n|}t          |d          }| j        d         }||t	          |dz  d	|z            }n|t	          d
|dz            }n|}t          | dddf         |d          }	|	j        d         }t          j        t          j        |df          |	f         }	| | d         }
t                      }|}t          |
|	ddd|dz   f                                       ||          }t          |j                  }t          |j                  }|dk    r.||z
  |j        z  }t           j                            ||          }n|t          j        t          j        |df          t          j        |          f          }|                    |dd          }t          |j                  }t          |j                  }|r||_        ||_        |||||fS ||||fS )a  
    Lagrange Multiplier tests for autocorrelation.

    This is a generic Lagrange Multiplier test for autocorrelation. Returns
    Engle's ARCH test if resid is the squared residual array. Breusch-Godfrey
    is a variation on this test with additional exogenous variables.

    Parameters
    ----------
    resid : array_like
        Time series to test.
    nlags : int, default None
        Highest lag to use.
    store : bool, default False
        If true then the intermediate results are also returned.
    period : int, default none
        The period of a Seasonal time series.  Used to compute the max lag
        for seasonal data which uses min(2*period, nobs // 5) if set. If None,
        then the default rule is used to set the number of lags. When set, must
        be >= 2.
    ddof : int, default 0
        The number of degrees of freedom consumed by the model used to
        produce resid. The default value is 0.
    cov_type : str, default "nonrobust"
        Covariance type. The default is "nonrobust` which uses the classic
        OLS covariance estimator. Specify one of "HC0", "HC1", "HC2", "HC3"
        to use White's covariance estimator. All covariance types supported
        by ``OLS.fit`` are accepted.
    cov_kwargs : dict, default None
        Dictionary of covariance options passed to ``OLS.fit``. See OLS.fit for
        more details.

    Returns
    -------
    lm : float
        Lagrange multiplier test statistic.
    lmpval : float
        The p-value for Lagrange multiplier test.
    fval : float
        The f statistic of the F test, alternative version of the same
        test based on F test for the parameter restriction.
    fpval : float
        The pvalue of the F test.
    res_store : ResultsStore, optional
        Intermediate results. Only returned if store=True.

    See Also
    --------
    het_arch
        Conditional heteroskedasticity testing.
    acorr_breusch_godfrey
        Breusch-Godfrey test for serial correlation.
    acorr_ljung_box
        Ljung-Box test for serial correlation.

    Notes
    -----
    The test statistic is computed as (nobs - ddof) * r2 where r2 is the
    R-squared from a regression on the residual on nlags lags of the
    residual.
    rW   r#   ndimr   Nr   r   r   rQ   r   bothtrimr   r   rz   FTr   )r   r   r   r&   r   r   r'   r+   onesrB   r   rV   floatfvaluef_pvaluersquaredr   r   r]   r   r   r   r   r   rd   resolsusedlag)rW   r   rf   r   r   r   r   rg   r   xdallxshort	res_storer   r   fvalfpvallmlmpvalr   	test_stats                       r0   r   r     s'   @ ugA...E8Z00H!)zJ:|44J;q>DemTQYF
++	R##5D>6777E;q>DE"'4)$$e+,ED566]FIGqqq,7Q;,/0044h@J 5 L LFD&/""E;TkV_,r7++ 9bh|44bfWooFGG$$XU4$HH	9&''y'(( '!	#	64	1164&&r2   c                 .    t          | dz  |||          S )aL  
    Engle's Test for Autoregressive Conditional Heteroscedasticity (ARCH).

    Parameters
    ----------
    resid : ndarray
        residuals from an estimation, or time series
    nlags : int, default None
        Highest lag to use.
    store : bool, default False
        If true then the intermediate results are also returned
    ddof : int, default 0
        If the residuals are from a regression, or ARMA estimation, then there
        are recommendations to correct the degrees of freedom by the number
        of parameters that have been estimated, for example ddof=p+q for an
        ARMA(p,q).

    Returns
    -------
    lm : float
        Lagrange multiplier test statistic
    lmpval : float
        p-value for Lagrange multiplier test
    fval : float
        fstatistic for F test, alternative version of the same test based on
        F test for the parameter restriction
    fpval : float
        pvalue for F test
    res_store : ResultsStore, optional
        Intermediate results. Returned if store is True.

    Notes
    -----
    verified against R:FinTS::ArchTest
    rQ   )r   rf   r   )r   )rW   r   rf   r   s       r0   r   r   K  s     J EQJe5tDDDDr2   resultsro   c                 &   t          j        | j                                                  }|j        dk    rt          d          | j        j        }|j        d         }|t          d|dz            }t          j
        t          j        |          |f          }t          |dddf         |d          }|j        d         }t           j        t          j        |df          |f         }|| d         }||}nt          j        ||f          }|j        d         }	t!          ||                                          }
|
                    t          j        ||	|	|z
                      }|j        }|j        }t-          t          j        |                    }t-          t          j        |                    }||
j        z  }t0          j                            ||          }|r#t7                      }|
|_        ||_        |||||fS ||||fS )	a  
    Breusch-Godfrey Lagrange Multiplier tests for residual autocorrelation.

    Parameters
    ----------
    res : RegressionResults
        Estimation results for which the residuals are tested for serial
        correlation.
    nlags : int, optional
        Number of lags to include in the auxiliary regression. (nlags is
        highest lag).
    store : bool, default False
        If store is true, then an additional class instance that contains
        intermediate results is returned.

    Returns
    -------
    lm : float
        Lagrange multiplier test statistic.
    lmpval : float
        The p-value for Lagrange multiplier test.
    fval : float
        The value of the f statistic for F test, alternative version of the
        same test based on F test for the parameter restriction.
    fpval : float
        The pvalue for F test.
    res_store : ResultsStore
        A class instance that holds intermediate results. Only returned if
        store=True.

    Notes
    -----
    BG adds lags of residual to exog in the design matrix for the auxiliary
    regression with residuals as endog. See [1]_, section 12.7.1.

    References
    ----------
    .. [1] Greene, W. H. Econometric Analysis. New Jersey. Prentice Hall;
      5th edition. (2002).
    r#   zFModel resid must be a 1d array. Cannot be used on multivariate models.r   Nr   r   r   r   )r'   asarrayrW   squeezer   r9   r7   r:   r&   r   concatenater   r   r+   r   rq   r   rV   f_testr   r   rd   r   r   r   r   r]   rB   r   r   )ro   r   rf   r=   exog_oldrg   r   r   r:   k_varsr   ftr   r   r   r   r   s                    r0   r   r   s  s   V 	
39%%''Av{{ 1 2 2 	2y~H71:D}B	""
+,,A1QQQW:u6222E;q>DE"'4)$$e+,EuvvYF%011Z]F""$$F	rveVVe^<<	=	=B9DIED!!""D"*U##$$E		BZ]]2u%%F  ' NN	!	!	64	1164&&r2   r=   	test_namereturnc                     |                      d          }t          j        ||                     d          z
  dk    |dk    z            r| j        d         dk     rt          | d          dS )z
    Check validity of the exogenous regressors in a heteroskedasticity test

    Parameters
    ----------
    x : ndarray
        The exogenous regressor array
    test_name : str
        The test name for the exception
    r   r~   r#   rQ   zI test requires exog to have at least two columns where one is a constant.N)r   r'   anyr   r&   r9   )r=   r   x_maxs      r0   _check_het_testr     s     EEqEMMEFUQUUU]]*q0UaZ@AA
71:>> 3 3 3
 
 	
 >r2   c                    t          |dd          }t          |d           t          | dd          dz  }|s|t          j        |          z  }|j        \  }}t          ||                                          }|j        }|j        }	|r
||j	        z  n	|j
        dz  }
|
t          j                            |
|dz
            ||	fS )u	  
    Breusch-Pagan Lagrange Multiplier test for heteroscedasticity

    The tests the hypothesis that the residual variance does not depend on
    the variables in x in the form

    .. :math: \sigma_i = \sigma * f(\alpha_0 + \alpha z_i)

    Homoscedasticity implies that :math:`\alpha=0`.

    Parameters
    ----------
    resid : array_like
        For the Breusch-Pagan test, this should be the residual of a
        regression. If an array is given in exog, then the residuals are
        calculated by the an OLS regression or resid on exog. In this case
        resid should contain the dependent variable. Exog can be the same as x.
    exog_het : array_like
        This contains variables suspected of being related to
        heteroscedasticity in resid.
    robust : bool, default True
        Flag indicating whether to use the Koenker version of the
        test (default) which assumes independent and identically distributed
        error terms, or the original Breusch-Pagan version which assumes
        residuals are normally distributed.

    Returns
    -------
    lm : float
        lagrange multiplier statistic
    lm_pvalue : float
        p-value of lagrange multiplier test
    fvalue : float
        f-statistic of the hypothesis that the error variance does not depend
        on x
    f_pvalue : float
        p-value for the f-statistic

    Notes
    -----
    Assumes x contains constant (for counting dof and calculation of R^2).
    In the general description of LM test, Greene mentions that this test
    exaggerates the significance of results in small or moderately large
    samples. In this case the F-statistic is preferable.

    **Verification**

    Chisquare test statistic is exactly (<1e-13) the same result as bptest
    in R-stats with defaults (studentize=True).

    **Implementation**

    This is calculated using the generic formula for LM test using $R^2$
    (Greene, section 17.6) and not with the explicit formula
    (Greene, section 11.4.3), unless `robust` is set to False.
    The degrees of freedom for the p-value assume x is full rank.

    References
    ----------
    .. [1] Greene, W. H. Econometric Analysis. New Jersey. Prentice Hall;
       5th edition. (2002).
    .. [2]  Breusch, T. S.; Pagan, A. R. (1979). "A Simple Test for
       Heteroskedasticity and Random Coefficient Variation". Econometrica.
       47 (5): 1287–1294.
    .. [3] Koenker, R. (1981). "A note on studentizing a test for
       heteroskedasticity". Journal of Econometrics 17 (1): 107–112.
    exog_hetrQ   r   zThe Breusch-PaganrW   r#   )r   r   r'   meanr&   r   rV   r   r   r   essr   r   r]   )rW   r   robustr=   rx   rg   nvarsr   r   r   r   s              r0   r   r     s    H 	8Za000AA*+++5'***a/A 

N'KD%AYY]]__F=DOE#)	=		vzA~Buz}}R++T588r2   c                 `   t          |dd          }t          | dd|j        d         df          }t          |d           |j        \  }}t          j        |          \  }}|d	d	|f         |d	d	|f         z  }|j        \  }}|||dz
  z  d
z  |z   k    sJ t          |dz  |                                          }	|	j        }
|	j        }||	j	        z  }|	j
        t          j                            |          dz
  k    sJ t          j                            ||	j
                  }|||
|fS )a  
    White's Lagrange Multiplier Test for Heteroscedasticity.

    Parameters
    ----------
    resid : array_like
        The residuals. The squared residuals are used as the endogenous
        variable.
    exog : array_like
        The explanatory variables for the variance. Squares and interaction
        terms are automatically included in the auxiliary regression.

    Returns
    -------
    lm : float
        The lagrange multiplier statistic.
    lm_pvalue :float
        The p-value of lagrange multiplier test.
    fvalue : float
        The f-statistic of the hypothesis that the error variance does not
        depend on x. This is an alternative test variant not the original
        LM test.
    f_pvalue : float
        The p-value for the f-statistic.

    Notes
    -----
    Assumes x contains constant (for counting dof).

    question: does f-statistic make sense? constant ?

    References
    ----------
    Greene section 11.4.1 5th edition p. 222. Test statistic reproduces
    Greene 5th, example 11.3.
    r:   rQ   r   rW   r   r#   )r   r&   zWhite's heteroskedasticityNrP   )r   r&   r   r'   triu_indicesr   rV   r   r   r   df_modelr(   r*   r   r   r]   )rW   r:   r=   rx   rg   nvars0i0i1r   r   r   r   r   r   s                 r0   r   r   .  sB   J 	4a(((A5'!'!*aAAAAA34447LD&_V$$FBQQQU8a2hD*KD%Ffqj)B.77777a""$$F=DOE		B
 ?bi33D99A=====Z]]2v//FvtU""r2   
increasingc                    t          j        |          }t          j        |           } |j        \  }}||dz  }nd|k     r|dk     rt          ||z            }||}	n'd|k     r|dk     r|t          ||z            z   }	n||z   }	|2t          j        |dd|f                   }
| |
         } ||
ddf         }t          | d|         |d|                                                   }t          | |	d         ||	d                                                   }|j        |j        z  }|                                dv r.t          j
                            ||j        |j                  }d}n|                                dv r1t          j
                            d|z  |j        |j                  }d	}n|                                d
v rlt          j
                            ||j        |j                  }t          j
                            ||j        |j                  }dt          ||          z  }d}nt          d          |rtt!                      }d|_        ||_        ||_        |j        |j        f|_        ||_        ||_        ||_        ||_        d                    |||          |_        ||||fS |||fS )ak  
    Goldfeld-Quandt homoskedasticity test.

    This test examines whether the residual variance is the same in 2
    subsamples.

    Parameters
    ----------
    y : array_like
        endogenous variable
    x : array_like
        exogenous variable, regressors
    idx : int, default None
        column index of variable according to which observations are
        sorted for the split
    split : {int, float}, default None
        If an integer, this is the index at which sample is split.
        If a float in 0<split<1 then split is interpreted as fraction
        of the observations in the first sample. If None, uses nobs//2.
    drop : {int, float}, default None
        If this is not None, then observation are dropped from the middle
        part of the sorted series. If 0<split<1 then split is interpreted
        as fraction of the number of observations to be dropped.
        Note: Currently, observations are dropped between split and
        split+drop, where split and drop are the indices (given by rounding
        if specified as fraction). The first sample is [0:split], the
        second sample is [split+drop:]
    alternative : {"increasing", "decreasing", "two-sided"}
        The default is increasing. This specifies the alternative for the
        p-value calculation.
    store : bool, default False
        Flag indicating to return the regression results

    Returns
    -------
    fval : float
        value of the F-statistic
    pval : float
        p-value of the hypothesis that the variance in one subsample is
        larger than in the other subsample
    ordering : str
        The ordering used in the alternative.
    res_store : ResultsStore, optional
        Storage for the intermediate and final results that are calculated

    Notes
    -----
    The Null hypothesis is that the variance in the two sub-samples are the
    same. The alternative hypothesis, can be increasing, i.e. the variance
    in the second sample is larger than in the first, or decreasing or
    two-sided.

    Results are identical R, but the drop option is defined differently.
    (sorting by idx not tested yet)
    NrQ   r   r#   )iincr  r  )ddec
decreasing      ?r
  )2z2-sided	two-sidedr  zinvalid alternativez5Test Results for Goldfeld-Quandt test ofheterogeneityzThe Goldfeld-Quandt test for null hypothesis that the variance in the second
subsample is {} than in the first subsample:
F-statistic ={:8.4f} and p-value ={:8.4f})r'   r   r&   r   argsortr   rV   	mse_residlowerr   fr]   rv   cdfr   r9   rB   __doc__r   r   df_fvalresols1resols2orderingsplitrS   rD   )rx   r=   idxr  dropalternativerf   rg   r   start2xsortindr  r  r   r   r  fpval_smfpval_laro   s                      r0   r   r   h  s   r 	
1A

1A'KD%}	
e))%!))D5L!!|
d((TD[)))
:a3i((hKhkN!FUF)QvvY''++--G!FGG*aj))--//Gw00D888

4!173CDD					 :	:	:

29g&68HII					 =	=	=7;;tW%5w7GHH7::dG$4g6FGGC(+++./// *nn&	')9:	- .4VHdE-J-J 	
 UHc))  r2   result   fittedc                    t          | t                    st          d          t          | j        j                  r*| j        j        j        d         dk    rt          d          t          |dd          }t          |dd	          }t          |d
          }t          |t                    r5|dk     rt          d          t          j        d|dz   t                    }n	 t          j        |t                    }n# t           $ r t          d          w xY w|j        dk    sCt%          t'          |                    |j        d         k    s|dk                                     rt          d          | j        j        }|dk    r%t          j        | j                  dddf         nR|dk    r| j        j        ||                    d          k    ||                    d          k    z  }|                    d          }|                                rt          d          dd| f         nddlm} || j        rnt          j        j        d                                                   }	|	                    t          | j        j        j                             dd|	f          |dt          | j                  t          | j                  d          }
|
j         ddddf         t          j!        |gfd|D             z             }| j        j"        } || j        j        j#        |          }|i n|}|$                    ||          } |j        d         |j        d         z
  }|j        d         }t          j%        ||||z
            }| &                    ||d          S )a  
    Ramsey's RESET test for neglected nonlinearity

    Parameters
    ----------
    res : RegressionResults
        A results instance from a linear regression.
    power : {int, List[int]}, default 3
        The maximum power to include in the model, if an integer. Includes
        powers 2, 3, ..., power. If an list of integers, includes all powers
        in the list.
    test_type : str, default "fitted"
        The type of augmentation to use:

        * "fitted" : (default) Augment regressors with powers of fitted values.
        * "exog" : Augment exog with powers of exog. Excludes binary
          regressors.
        * "princomp": Augment exog with powers of first principal component of
          exog.
    use_f : bool, default False
        Flag indicating whether an F-test should be used (True) or a
        chi-square test (False).
    cov_type : str, default "nonrobust
        Covariance type. The default is "nonrobust` which uses the classic
        OLS covariance estimator. Specify one of "HC0", "HC1", "HC2", "HC3"
        to use White's covariance estimator. All covariance types supported
        by ``OLS.fit`` are accepted.
    cov_kwargs : dict, default None
        Dictionary of covariance options passed to ``OLS.fit``. See OLS.fit
        for more details.

    Returns
    -------
    ContrastResults
        Test results for Ramsey's Reset test. See notes for implementation
        details.

    Notes
    -----
    The RESET test uses an augmented regression of the form

    .. math::

       Y = X\beta + Z\gamma + \epsilon

    where :math:`Z` are a set of regressors that are one of:

    * Powers of :math:`X\hat{\beta}` from the original regression.
    * Powers of :math:`X`, excluding the constant and binary regressors.
    * Powers of the first principal component of :math:`X`. If the
      model includes a constant, this column is dropped before computing
      the principal component. In either case, the principal component
      is extracted from the correlation matrix of remaining columns.

    The test is a Wald test of the null :math:`H_0:\gamma=0`. If use_f
    is True, then the quadratic-form test statistic is divided by the
    number of restrictions and the F distribution is used to compute
    the critical value.
    z/result must come from a linear regression modelr#   zjexog contains only a constant column. The RESET test requires exog to have at least 1 non-constant column.	test_type)r"  r:   princomp)optionsr   Tr   r   rQ   zpower must be >= 2r   z,power must be an integer or list of integersr   z.power must contains distinct integers all >= 2r"  Nr:   r   z+Model contains only constant or binary data)PCAnipals)ncompstandardizedemeanmethodc                     g | ]}|z  S rL   rL   ).0pr   s     r0   
<listcomp>z linear_reset.<locals>.<listcomp>E  s    ";";";3!8";";";r2   r   )r   r   )'r4   r   r5   boolr7   
k_constantr:   r&   r9   r   r   r   r   r'   r   array	Exceptionr   lensetr   r   rU   r   r   allstatsmodels.multivariate.pcar'  tolistpopdata	const_idxfactorsr   rF   r8   rV   r   r   )ro   powerr$  r   r   r   r:   binaryr'  retainpcaaug_exog	mod_classmodnrestrnparamsr_matr   s                    @r0   linear_resetrH    s   | c344 KIJJJCI !! 1cin&:1&=&B&B 0 1 1 	1 I{$BD D DI:|dCCCJeW%%E% O1991222	!UQYc222	MHU#...EE 	M 	M 	MKLLL	M:??c#e**ooQ??!! @MNNN9>DHj)**111d73	f		in4888+++a8H8H0HI##::<< 	LJKKK!!!fW*o444444> 	!Ysy|,,3355FJJs39>344555aaai.Cc#QD,@,@cn--h@ @ @k!!!RaR% y$";";";";U";";";;<<H	#I
)CIN((
3
3C!)zJ
''8
'
;
;C^AA.FnQGF67gfn555E==eD=999s   (D Dc                 n    t          | |d|          }t          j        |d         dd         d          S )au  
    Harvey Collier test for linearity

    The Null hypothesis is that the regression is correctly modeled as linear.

    Parameters
    ----------
    res : RegressionResults
        A results instance from a linear regression.
    order_by : array_like, default None
        Integer array specifying the order of the residuals. If not provided,
        the order of the residuals is not changed. If provided, must have
        the same number of observations as the endogenous variable.
    skip : int, default None
        The number of observations to use for initial OLS, if None then skip is
        set equal to the number of regressors (columns in exog).

    Returns
    -------
    tvalue : float
        The test statistic, based on ttest_1sample.
    pvalue : float
        The pvalue of the test.

    See Also
    --------
    statsmodels.stats.diadnostic.recursive_olsresiduals
        Recursive OLS residual calculation used in the test.

    Notes
    -----
    This test is a t-test that the mean of the recursive ols residuals is zero.
    Calculating the recursive residuals might take some time for large samples.
    ffffff?)skipalphaorder_byr!  Nr   )recursive_olsresidualsr   ttest_1samp)ro   rM  rK  rrs       r0   r!   r!   P  s:    L 
 $dX	N	N	NBRU122Y***r2         ?c                 N   t          | t                    st          d          t          |d          }t	          |d          }| j        }| j        j        }| j        j        }||rt          d          |t          |t          j                  rt          |ddd	          }nt          |t                    r|g}	 | j        j        j        |                                         }n$# t"          t$          f$ r t          d
          w xY wd}	|	|v r	|	dz  }	|	|v 	t          j        |j        d                   ||	<   |                    |          }t          j        ||	                   }||         }||         }|ru|t/          |          dz  n|}t          |t0                    r4d|cxk    rdk    sn t          d          t/          ||dz
  z            }n1t3          |d          }d|cxk     r	|dz
  k     sn t          d          |||dz            }
ddlm} 	 ||
z
  }t          j                            |j        |z  |z            }n\# t          j        j        $ rE ||                     d          z
  }t          j                            |j        |z  |z            }Y nw xY w |||
d|          }t          j!        |"                                          }||         }||         }t          j#        dd|z
  z  |z            $                    t.                    }t          j%        |||z  z             $                    t.                    }||z
  |j        d         k     rt          d          tM          ||          }tO          ||         ||                   (                                }|j        j        j        d         }|j)        }| j)        }||z
  ||z
  z  |z  |j*        z  }tV          j,        -                    |||z
  |j*                  }||fS )a  
    Rainbow test for linearity

    The null hypothesis is the fit of the model using full sample is the same
    as using a central subset. The alternative is that the fits are difference.
    The rainbow test has power against many different forms of nonlinearity.

    Parameters
    ----------
    res : RegressionResults
        A results instance from a linear regression.
    frac : float, default 0.5
        The fraction of the data to include in the center model.
    order_by : {ndarray, str, List[str]}, default None
        If an ndarray, the values in the array are used to sort the
        observations. If a string or a list of strings, these are interpreted
        as column name(s) which are then used to lexicographically sort the
        data.
    use_distance : bool, default False
        Flag indicating whether data should be ordered by the Mahalanobis
        distance to the center.
    center : {float, int}, default None
        If a float, the value must be in [0, 1] and the center is center *
        nobs of the ordered data.  If an integer, must be in [0, nobs) and
        is interpreted as the observation of the ordered data to use.

    Returns
    -------
    fstat : float
        The test statistic based on the F test.
    pvalue : float
        The pvalue of the test.

    Notes
    -----
    This test assumes residuals are homoskedastic and may reject a correct
    linear specification if the residuals are heteroskedastic.
    z3res must be a results instance from a linear model.fracuse_distanceNz7order_by and use_distance cannot be simultaneouslyused.rM  r#   r   )r   r   zvorder_by must contain valid column names from the exog data used to construct res,and exog must be a pandas DataFrame.	__index___r   rQ           r  z&center must be in (0, 1) when a float.centerz(center must be in [0, nobs) when an int.)cdistmahalanobis)metricVIrQ  zqfrac is too small to perform test. frac * nobsmust be greater than the number of exogenousvariables in the model.).r4   r   r5   r   r   rg   r7   r8   r:   r9   r'   ndarrayr   strr;  	orig_exogcopy
IndexErrorKeyErrorr   r&   sort_valuesr   r   r   r   scipy.spatial.distancerY  r(   invrY   LinAlgErrorr   r  ravelceilastypefloorslicer   rV   rT   rv   r   r  r]   )ro   rS  rM  rT  rX  rg   r8   r:   colsname
center_obsrY  r/   vire   r  lowidxuppidxmi_slres_minobs_miss_missfstatrn   s                            r0   r    r    {  s`   P c344 OMNNNdF##D\>::L8DIOE9>D ! " " 	"h
++ 	.!(JQeLLLHH(C(( &$:Hy~/9>>@@) H H H !G H H HH D$,, $,,4:a=11DJ##H--Dz$t*--HhH~ #)>Tavfe$$ 	M&''''C'''' !IJJJ46*++FFfh//Fv((((q(((( !KLLL&/*
000000	3#Csus{T122BBy$ 	3 	3 	31%Csus{T122BBB	3 uT:mCCCj&&c
CyWSAH%,--44S99FXftd{*++22377FA&& 3 4 4 	4 &&!!EutE{++//11Fl &q)GJE	B%ZD7N+e3foEE7::eTG^V_==D$;s   )C+ +!D&/I AJ/.J/c           	         |d }t          j        |          }t          j        | ||ddddf                   f          }|j        \  }}t	          | |                                          }|                    t          j        |dz
  |dz  dz
  |                    }||j        z  }t          j
                            ||dz
            }	||	|fS )ae  
    Lagrange multiplier test for linearity against functional alternative

    # TODO: Remove the restriction
    limitations: Assumes currently that the first column is integer.
    Currently it does not check whether the transformed variables contain NaNs,
    for example log of negative number.

    Parameters
    ----------
    resid : ndarray
        residuals of a regression
    exog : ndarray
        exogenous variables for which linearity is tested
    func : callable, default None
        If func is None, then squares are used. func needs to take an array
        of exog and return an array of transformed variables.

    Returns
    -------
    lm : float
       Lagrange multiplier test statistic
    lm_pval : float
       p-value of Lagrange multiplier tes
    ftest : ContrastResult instance
       the results from the F test variant of this test

    Notes
    -----
    Written to match Gretl's linearity test. The test runs an auxiliary
    regression of the residuals on the combined original and transformed
    regressors. The Null hypothesis is that the linear specification is
    correct.
    Nc                 ,    t          j        | d          S )NrQ   )r'   r>  )r=   s    r0   funczlinear_lm.<locals>.func  s    8Aq>>!r2   r#   rQ   )r'   r   rq   r&   r   rV   r   r   r   r   r   r]   )
rW   r:   rz  exog_auxrg   r   lsftestr   lm_pvals
             r0   r   r     s    F |	" 	" 	":dDdd4122;&7&7899H:LD&	UH			!	!	#	#BIIbfVaZ!a@@AAE		BjmmB
++Gwr2   c                 z   t          |dd          }t          | dd          }|j        d         dk     s+t          j        t          j        |d          dk              st          d          t          j        |j        d                   \  }}t          j        |d	d	|f         |d	d	|f         z  dd          }d
}d}|||                    d          z  z   }t          j	        
                    |d          }	t          j        |	                                          t          j        |          k     }
|d	d	t          j        |
           d         f         }||z  }|t          j        |          z
  }t          j        |j        |          }|t          j        |d          z
  }||d	d	d	f         z  }|j                            |          }|                    t          j	                            ||                    }|j        d         }t&          j                            ||          }|||fS )a  
    White's Two-Moment Specification Test

    Parameters
    ----------
    resid : array_like
        OLS residuals.
    exog : array_like
        OLS design matrix.

    Returns
    -------
    stat : float
        The test statistic.
    pval : float
        A chi-square p-value for test statistic.
    dof : int
        The degrees of freedom.

    See Also
    --------
    het_white
        White's test for heteroskedasticity.

    Notes
    -----
    Implements the two-moment specification test described by White's
    Theorem 2 (1980, p. 823) which compares the standard OLS covariance
    estimator with White's heteroscedasticity-consistent estimator. The
    test statistic is shown to be chi-square distributed.

    Null hypothesis is homoscedastic and correctly specified.

    Assumes the OLS design matrix contains an intercept term and at least
    one variable. The intercept is removed to calculate the test statistic.

    Interaction terms (squares and crosses of OLS regressors) are added to
    the design matrix to calculate the test statistic.

    Degrees-of-freedom (full rank) = nvar + nvar * (nvar + 1) / 2

    Linearly dependent columns are removed to avoid singular matrix error.

    References
    ----------
    .. [*] White, H. (1980). A heteroskedasticity-consistent covariance matrix
       estimator and a direct test for heteroscedasticity. Econometrica, 48:
       817-838.
    r:   rQ   r   rW   r#   r   rW  zPWhite's specification test requires at least twocolumns where one is a constant.Ng+=gvIh%<=r)moder   )r   r&   r'   r   ptpr9   r   deletevarr(   qrr^   diagonalr[   wherer   rX   rY   solver   r   r]   )rW   r:   r=   er  r  atolrtolr   r  masksqesqmndevsr  devxr   r   dofrn   s                      r0   r   r     s   d 	4a(((A5'***AwqzA~~RVBF1aLLC$788~ < = = 	= _QWQZ((FB9Qqqq"uX!!!R%(!Q//D DD
#
#C
	T$$A6!**,,"'#,,.D28TE??1%%&D a%CRWS\\!H
tvx  A"'$Q''''DHQQQWD

4A55A&&''D *Q-C:==s##Ds?r2   
olsresultsrW  rJ  c           
         t          | t                    st          d          | j        j        }| j        j        }t          |dddd|j        d         f          }|||         }||         }|j        \  }}||}t          j	        t          j
        ||f          z  }	t          j	        t          j
        |          z  }
t          j	        t          j
        |          z  }t          j	        t          j
        |          z  }|d|         }t          j                            |          |j        d         k     rd	}t          |          |d|         }t          j                            t          j        |j        |          |t          j        |          z  z             }t          j        |j        |          }t          j        ||          }||	|dz
  <   t          j        ||dz
           |          }|||dz
  <   ||dz
           |z
  |
|dz
  <   dt          j        ||dz
           t          j        |||dz
                               z   ||dz
  <   t%          ||          D ]}|||dz   ddf         }||         }t          j        ||          }t          j        |          ||<   ||z
  }t          j        |          |
|<   t          j        ||j                  }dt          j        ||          z   }|t          j        ||j                  |z  z
  }|||z  |z                                  z   }||	|<   t          j        |          ||<   |
t          j        |          z  }||z
  }||d                             d
          }|t          j        |          z  }||dz
  d                                         }|dk    rd}n!|dk    rd}n|dk    rd}nt          d          |t          j        |          z  d|z  t          j        d||z
            z  t          j        |          z  z   t          j        dgdgg          z  } |
|	||||| fS )u  
    Calculate recursive ols with residuals and Cusum test statistic

    Parameters
    ----------
    res : RegressionResults
        Results from estimation of a regression model.
    skip : int, default None
        The number of observations to use for initial OLS, if None then skip is
        set equal to the number of regressors (columns in exog).
    lamda : float, default 0.0
        The weight for Ridge correction to initial (X'X)^{-1}.
    alpha : {0.90, 0.95, 0.99}, default 0.95
        Confidence level of test, currently only two values supported,
        used for confidence interval in cusum graph.
    order_by : array_like, default None
        Integer array specifying the order of the residuals. If not provided,
        the order of the residuals is not changed. If provided, must have
        the same number of observations as the endogenous variable.

    Returns
    -------
    rresid : ndarray
        The recursive ols residuals.
    rparams : ndarray
        The recursive ols parameter estimates.
    rypred : ndarray
        The recursive prediction of endogenous variable.
    rresid_standardized : ndarray
        The recursive residuals standardized so that N(0,sigma2) distributed,
        where sigma2 is the error variance.
    rresid_scaled : ndarray
        The recursive residuals normalize so that N(0,1) distributed.
    rcusum : ndarray
        The cumulative residuals for cusum test.
    rcusumci : ndarray
        The confidence interval for cusum test using a size of alpha.

    Notes
    -----
    It produces same recursive residuals as other version. This version updates
    the inverse of the X'X matrix and does not require matrix inversion during
    updating. looks efficient but no timing

    Confidence interval in Greene and Brown, Durbin and Evans is the same as
    in Ploberger after a little bit of algebra.

    References
    ----------
    jplv to check formulas, follows Harvey
    BigJudge 5.5.2b for formula for inverse(X'X) updating
    Greene section 7.5.2

    Brown, R. L., J. Durbin, and J. M. Evans. “Techniques for Testing the
    Constancy of Regression Relationships over Time.”
    Journal of the Royal Statistical Society. Series B (Methodological) 37,
    no. 2 (1975): 149-192.
    z!res a regression results instancerM  r   Tr#   r   )r   r   r   r&   Nz"The initial regressor matrix, x[:skip], issingular. You must use a value of
skip large enough to ensure that the first OLS estimator is well-defined.
)r   g?g333333?rJ  gtV?gGz?g}?5^I?z#alpha can only be 0.9, 0.95 or 0.99rQ   g      r  )r4   r   r5   r7   r8   r:   r   r&   r'   r   r   r(   r*   r9   re  rX   rY   r   ranger   rg  r[   r  r   r   r3  )!ro   rK  lamdarL  rM  rx   r=   rg   r   rparamsrresidrypredrvarrawx0err_msgy0xtxixtybetayipredr  xiyiresiditmpr   rresid_scalednrrsigma2rresid_standardizedrcusumr   rcusumcis!                                    r0   rN  rN  m  sJ   z c344 =;<<<	A	A(Jed 7 7 7H hKhK'KD%|frxu...GVbhtnn$FVbhtnn$Ffrx~~%G	
5D5B	yR  28A;.. !!!	
5D5B9==b))EBF5MM,AABBD
&r

C6$DGD1HVAdQhK&&FF4!8{V+F4!8BF1TAX;tQtax[0I0IJJJGD1H4 $ $qQwz]qT D!!Jv&&q	fJv&&q	 fT24  C bfS#%((2--sV|b(//111
Z^^

RWW---M
+C 455!%%1%--F'"'&//9 +2244F }}	$	$>??? BGCLL 1q529Qt+D+D#DrwH H $ 3%#(()HGV%8-H r2   c                    | j         j        }t          | j        d|j        d         df          }|j        \  }}|dz  }t
          j        ||dddf         z  ||                                z
  f         }|                    d          }||dddddf         |dddddf         z  	                    d          z  }|dddddf         |dddddf         z  	                    d          }	t          j
        t          j        t
          j                            |          |	                    }
t          j        g ddt          fd	t           fg
          }|
|fS )a  
    Test for model stability, breaks in parameters for ols, Hansen 1992

    Parameters
    ----------
    olsresults : RegressionResults
        Results from estimation of a regression model.

    Returns
    -------
    teststat : float
        Hansen's test statistic.
    crit : ndarray
        The critical values at alpha=0.95 for different nvars.

    Notes
    -----
    looks good in example, maybe not very powerful for small changes in
    parameters

    According to Greene, distribution of test statistics depends on nvar but
    not on nobs.

    Test statistic is verified against R:strucchange

    References
    ----------
    Greene section 7.5.1, notation follows Greene
    rW   r   r#   )r&   rQ   N))rQ   g)\(?)   gffffff?)   g      @)   gGz@rg   critr   )r7   r:   r   rW   r&   r'   r+   r   r   sumtracerX   r(   re  r3  r   r   )r  r=   rW   rg   r   resid2r   scorer  r   hcrit95s               r0   breaks_hansenr    se   < 	Az'QHHHE'KD%aZF	q5D>!FV[[]]$:;	<BIIaLLE111aaa:AAAtQQQJ/44Q777A	qqq!!!Tz	U111dAAA:.	.33A66A
	a((!,,--AXCCC$c]VUO<> > >F f9r2   c                    t          j        |                                           } t          |           }| dz                                  }|dk    r|||z
  z  |z  }|                                 t          j        |          z  }t          j        |                                          }g d}t          j
                            |          }|||fS )u  
    Cusum test for parameter stability based on ols residuals.

    Parameters
    ----------
    resid : ndarray
        An array of residuals from an OLS estimation.
    ddof : int
        The number of parameters in the OLS estimation, used as degrees
        of freedom correction for error variance.

    Returns
    -------
    sup_b : float
        The test statistic, maximum of absolute value of scaled cumulative OLS
        residuals.
    pval : float
        Probability of observing the data under the null hypothesis of no
        structural change, based on asymptotic distribution which is a Brownian
        Bridge
    crit: list
        The tabulated critical values, for alpha = 1%, 5% and 10%.

    Notes
    -----
    Tested against R:structchange.

    Not clear: Assumption 2 in Ploberger, Kramer assumes that exog x have
    asymptotically zero mean, x.mean(0) = [1, 0, 0, ..., 0]
    Is this really necessary? I do not see how it can affect the test statistic
    under the null. It does make a difference under the alternative.
    Also, the asymptotic distribution of test statistic depends on this.

    From examples it looks like there is little power for standard cusum if
    exog (other than constant) have mean zero.

    References
    ----------
    Ploberger, Werner, and Walter Kramer. “The Cusum Test with OLS Residuals.”
    Econometrica 60, no. 2 (March 1992): 271-285.
    rQ   r   ))r#   gGz?)r   g(\?)r   gQ?)r'   r   rg  r5  r  r   r[   r^   r   r   	kstwobignr]   )rW   r   rg   
nobssigma2r   sup_br  rn   s           r0   breaks_cusumolsresidr  +  s    T Ju##%%Eu::D1*!!##Jaxx4$;/$6
,,,AF1IIMMOOE---D
 ?e$$D$r2   )F)rz   N)NFr   NTF)NF)NFr   )T)NNNr  F)r!  r"  Frz   N)NN)rQ  NFN)N)NrW  rJ  N)r   )9r  statsmodels.compat.pandasr   collections.abcr   numpyr'   pandasr   scipyr   #statsmodels.regression.linear_modelr   r   statsmodels.stats._adnormr   r	   statsmodels.stats._lillieforsr
   r   r   r   statsmodels.tools.validationr   r   r   r   r   r   statsmodels.tsa.tsatoolsr   __all__rR   r1   r@   rB   r   r   r   r   r   r   r   r]  r^  r   r   r   r   rH  r!   r    r   r   rN  r  r  rL   r2   r0   <module>r     s'   0 6 5 5 5 5 5 $ $ $ $ $ $               M M M M M M M M C C C C C C C C                           , + + + + +L L LF F F0  &> > > > > > > >
D D D DN6 6 6 6r 9D$(TJ TJ TJ TJn FJ,1X$ X$ X$ X$v 7##e'tkde' e' e' e' $#e'P 7##$E $E $E $#$EN E""O' O' O' #"O'd
rz 
c 
d 
 
 
 
,O9 O9 O9 O9d7# 7# 7#t 9=7<q! q! q! q!h 5!!9>26p: p: p: "!p:f(+ (+ (+ (+V ?Dk k k k\. . . .bP P Pf u%%<@$(M M M &%M`* * *Z9 9 9 9 9 9r2   