
    M/Ph$                         d Z ddlZddlZddlmZ ddlmZ ddlm	Z	m
Z
mZmZmZmZmZmZ ddgZ G d	 de	          Z G d
 de          Z G d d          Z G d de          ZdS )aY  
Multivariate Conditional and Unconditional Kernel Density Estimation
with Mixed Data Types

References
----------
[1] Racine, J., Li, Q. Nonparametric econometrics: theory and practice.
    Princeton University Press. (2007)
[2] Racine, Jeff. "Nonparametric Econometrics: A Primer," Foundation
    and Trends in Econometrics: Vol 3: No 1, pp1-88. (2008)
    http://dx.doi.org/10.1561/0800000009
[3] Racine, J., Li, Q. "Nonparametric Estimation of Distributions
    with Categorical and Continuous Data." Working Paper. (2000)
[4] Racine, J. Li, Q. "Kernel Estimation of Multivariate Conditional
    Distributions Annals of Economics and Finance 5, 211-235 (2004)
[5] Liu, R., Yang, L. "Kernel estimation of multivariate
    cumulative distribution function."
    Journal of Nonparametric Statistics (2008)
[6] Li, R., Ju, G. "Nonparametric Estimation of Multivariate CDF
    with Categorical and Continuous Data." Working Paper
[7] Li, Q., Racine, J. "Cross-validated local linear nonparametric
    regression" Statistica Sinica 14(2004), pp. 485-512
[8] Racine, J.: "Consistent Significance Testing for Nonparametric
        Regression" Journal of Business & Economics Statistics
[9] Racine, J., Hart, J., Li, Q., "Testing the Significance of
        Categorical Predictor Variables in Nonparametric Regression
        Models", 2006, Econometric Reviews 25, 523-544

    N)optimize)
mquantiles   )
GenericKDEEstimatorSettingsgpkeLeaveOneOut_get_type_pos_adjust_shape_compute_min_std_IQRkernel_func	KernelRegKernelCensoredRegc                   h    e Zd ZdZ	 	 	 ddZd	 Zd
 Zd ZddZd Z	d Z
ddZddZd Zd Zd ZdS )r   a  
    Nonparametric kernel regression class.

    Calculates the conditional mean ``E[y|X]`` where ``y = g(X) + e``.
    Note that the "local constant" type of regression provided here is also
    known as Nadaraya-Watson kernel regression; "local linear" is an extension
    of that which suffers less from bias issues at the edge of the support. Note
    that specifying a custom kernel works only with "local linear" kernel
    regression. For example, a custom ``tricube`` kernel yields LOESS regression.

    Parameters
    ----------
    endog : array_like
        This is the dependent variable.
    exog : array_like
        The training data for the independent variable(s)
        Each element in the list is a separate variable
    var_type : str
        The type of the variables, one character per variable:

            - c: continuous
            - u: unordered (discrete)
            - o: ordered (discrete)

    reg_type : {'lc', 'll'}, optional
        Type of regression estimator. 'lc' means local constant and
        'll' local Linear estimator.  Default is 'll'
    bw : str or array_like, optional
        Either a user-specified bandwidth or the method for bandwidth
        selection. If a string, valid values are 'cv_ls' (least-squares
        cross-validation) and 'aic' (AIC Hurvich bandwidth estimation).
        Default is 'cv_ls'. User specified bandwidth must have as many
        entries as the number of variables.
    ckertype : str, optional
        The kernel used for the continuous variables.
    okertype : str, optional
        The kernel used for the ordered discrete variables.
    ukertype : str, optional
        The kernel used for the unordered discrete variables.
    defaults : EstimatorSettings instance, optional
        The default values for the efficient bandwidth estimation.

    Attributes
    ----------
    bw : array_like
        The bandwidth parameters.
    llcv_lsgaussian	wangryzinaitchisonaitkenNc
                    || _         || _        || _        || _        || _        || _        | j        t          v r| j        t          v r| j        t          v st          d          t          | j                   | _	        t          |d          | _        t          || j	                  | _        t          j        | j        | j        f          | _        t          j        | j                  d         | _        t%          | j        | j                  | _        |	t-                      n|	}	|                     |	           t1          |t2                    s;t          j        |          }t          |          | j	        k    rt          d          | j        s|                     |          | _        d S |                     |          | _        d S )NXuser specified kernel must be a supported kernel from statsmodels.nonparametric.kernels.r   r   lcr   z;bw must have the same dimension as the number of variables.)var_type	data_typereg_typeckertypeokertypeukertyper   
ValueErrorlenk_varsr   endogexognpcolumn_stackdatashapenobsdict_est_loc_constant_est_loc_linearestr   _set_defaults
isinstancestrasarray	efficient_compute_reg_bwbw_compute_efficient)
selfr#   r$   r   r   r4   r   r   r   defaultss
             k/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/nonparametric/kernel_regression.py__init__zKernelReg.__init__]   s    !!    ,,+1M1MM[00 N O O O $-(("5!,,
!$44	OTZ$;<<	HTY''*	41d6JKKK*2*:$&&&8$$$"c"" 	9BB2ww$+%%  "8 9 9 9~ 	2**2..DGGG--b11DGGG    c                    t          |t                    sd| _        t          j        |          S || _        |dk    r| j        }n| j        }t          j        | j        d          }d|z  | j	        ddt          j
        | j        d          z   z  z  z  }| j        | j                 }t          j        |||fd	d	d
          }|S )Nzuser-specifiedr   r   axisg(\?g         r   g     @@)x0argsmaxitermaxfundisp)r/   r0   
_bw_methodr%   r1   cv_looaic_hurvichstdr$   r)   sizer-   r   r   fmin)r6   r4   resXh0funcbw_estimateds          r8   r3   zKernelReg._compute_reg_bw}   s    "c"" 	 .DO:b>>! !DOW}}k&tyq)))Atq27491+E+E+E'EFGHB 8DM*D#=4(14SqJ J JLr:   c           
         |j         \  }}t          |||| j        | j        | j        | j        d          t          |          z  }|ddt          j        f         }||z
  }t          j	        |j
        ||z            }	||z                      d          }t          j        |dz   |dz   f          }
|                                |
d<   ||
dddf<   ||
dddf<   |	|
ddddf<   ||z  }t          j        |dz   df          }|                                |d<   ||z
  |z                      d          |dddf<   t          j	        t          j                            |
          |          }|d         }|ddddf         }||fS )a  
        Local linear estimator of g(x) in the regression ``y = g(x) + e``.

        Parameters
        ----------
        bw : array_like
            Vector of bandwidth value(s).
        endog : 1D array_like
            The dependent variable.
        exog : 1D or 2D array_like
            The independent variable(s).
        data_predict : 1D array_like of length K, where K is the number of variables.
            The point at which the density is estimated.

        Returns
        -------
        D_x : array_like
            The value of the conditional mean at `data_predict`.

        Notes
        -----
        See p. 81 in [1] and p.38 in [2] for the formulas.
        Unlike other methods, this one requires that `data_predict` be 1D.
        Fr'   data_predictr   r   r   r   tosumNr   r<   r   r   r   )r(   r   r   r   r   r   floatr%   newaxisdotTsumemptylinalgpinv)r6   r4   r#   r$   rQ   r)   r"   kerM12M22M	ker_endogVmean_mfxmeanmfxs                   r8   r,   zKernelReg._est_loc_linear   s   2 zf2D| M M M M     
 #(++. !!!RZ- \!fSUC#I&&Syoo1o%%Hfqj&1*-..''))$!QRR%!""a%!""abb&	%K	Hfqj!_%%--//$L(I5:::BB!""a%6")..++Q//{qrr111uoSyr:   c           
         t          |||| j        | j        | j        | j        d          }t          j        |t          j        |                    }||z                      d          }|                    d          }||z  }|j        d         }	|t          |	          z  }
t          |||| j        dd          }|ddt
          j
        f         }||z                      d           t          |	          z  }|                    d           t          |	          z  }||
z  ||z  |
z  z
  }||z  ||z  z
  |dz  z  }||fS )	ad  
        Local constant estimator of g(x) in the regression
        y = g(x) + e

        Parameters
        ----------
        bw : array_like
            Array of bandwidth value(s).
        endog : 1D array_like
            The dependent variable.
        exog : 1D or 2D array_like
            The independent variable(s).
        data_predict : 1D or 2D array_like
            The point(s) at which the density is estimated.

        Returns
        -------
        G : ndarray
            The value of the conditional mean at `data_predict`.
        B_x : ndarray
            The marginal effects.
        FrP   r   r<   
d_gaussian)r'   rQ   r   r   rR   N   )r   r   r   r   r   r%   reshaper(   rX   rT   rU   )r6   r4   r#   r$   rQ   ker_xG_numerG_denomGr)   f_xker_xcd_mxd_fxB_xs                  r8   r+   zKernelReg._est_loc_constant   s`   . Rd"m"m"m"m " " " 
5"(5//225=%%1%--)))##gz!}d#bt,#}+!	# # # 2:&$$!$,,,uT{{:


"""U4[[0Sj1t8c>)~$.7A:>#vr:   c                 ^   t          j        | j        | j        f          }t          | j                  D ]H}t	          || j        | j        |ddf         | j        | j        | j        | j	        d          |dd|f<   I|
                    d          }||z  }t          | j        | j        | j	        | j        |t          d                                                    d         }t          j        || j        df          }| j        |z
  d	z  
                    d          t#          | j                  z  }dt          j        |          t#          | j                  z  z   dt          j        |          d	z   t#          | j                  z  z
  z  }t          j        |          |z   }	|	S )
a  
        Computes the AIC Hurvich criteria for the estimation of the bandwidth.

        Parameters
        ----------
        bw : str or array_like
            See the ``bw`` parameter of `KernelReg` for details.

        Returns
        -------
        aic : ndarray
            The AIC Hurvich criteria, one element for each variable.
        func : None
            Unused here, needed in signature because it's used in `cv_loo`.

        References
        ----------
        See ch.2 in [1] and p.35 in [2].
        NF)r'   rQ   r   r   r   r   rR   r   r<   r2   )r#   r$   r   r   r4   r7   r   rg   )r%   rY   r)   ranger   r$   r   r   r   r   rX   r   r#   r   r   fitrh   rT   tracelog)
r6   r4   rM   Hjdenomgxsigmafracaics
             r8   rF   zKernelReg.aic_hurvich   s   ( Hdi+,,ty!! 	( 	(A2DIDIacN$(MDM$(MDM!&( ( (AaaadGG
 1ITZdi$- $" 1E B B BD D DDGCEE!M ZTYN++*r/A%***22U495E5EEBHQKK%	"2"222RXa[[1_di(8(888:
 fUmmd"
r:   c           
      N   t          | j                  }t          | j                                                  }d}t	          |          D ]P\  }}t          |          } |||| | j        |ddf                    d         }	|| j        |         |	z
  dz  z  }Q|| j        z  S )af  
        The cross-validation function with leave-one-out estimator.

        Parameters
        ----------
        bw : array_like
            Vector of bandwidth values.
        func : callable function
            Returns the estimator of g(x).  Can be either ``_est_loc_constant``
            (local constant) or ``_est_loc_linear`` (local_linear).

        Returns
        -------
        L : float
            The value of the CV function.

        Notes
        -----
        Calculates the cross-validation least-squares function. This function
        is minimized by compute_bw to calculate the optimal value of `bw`.

        For details see p.35 in [2]

        .. math:: CV(h)=n^{-1}\sum_{i=1}^{n}(Y_{i}-g_{-i}(X_{i}))^{2}

        where :math:`g_{-i}(X_{i})` is the leave-one-out estimator of g(X)
        and :math:`h` is the vector of bandwidths
        r   N)r#   r$   rQ   rg   )r	   r$   r#   __iter__	enumeratenextr)   )
r6   r4   rM   LOO_XLOO_YLiiX_not_iYrl   s
             r8   rE   zKernelReg.cv_loo*  s    : DI&&DJ''0022$U++ 	+ 	+KBUARqx#'9RU#3"35 5 5568A$*R.1$**AA 49}r:   c                 L   t          j        | j                  }|                                 d         }t          j        |          }||z
  ||z
  z                                  dz  }||z
  dz                      d          ||z
  dz                      d          z  }||z  S )a  
        Returns the R-Squared for the nonparametric regression.

        Notes
        -----
        For more details see p.45 in [2]
        The R-Squared is calculated by:

        .. math:: R^{2}=\frac{\left[\sum_{i=1}^{n}
            (Y_{i}-\bar{y})(\hat{Y_{i}}-\bar{y}\right]^{2}}{\sum_{i=1}^{n}
            (Y_{i}-\bar{y})^{2}\sum_{i=1}^{n}(\hat{Y_{i}}-\bar{y})^{2}},

        where :math:`\hat{Y_{i}}` is the mean calculated in `fit` at the exog
        points.
        r   rg   r<   )r%   squeezer#   ru   rc   rX   )r6   r   YhatY_barR2_numerR2_denoms         r8   	r_squaredzKernelReg.r_squaredS  s      Jtz""xxzz!}%iD5L166881<YN''Q'//E\A%***223(""r:   c           
         | j         | j                 }|| j        }nt          || j                  }t          j        |          d         }t          j        |f          }t          j        || j        f          }t          |          D ]j} || j	        | j
        | j        ||ddf                   }t          j        |d                   ||<   t          j        |d                   }|||ddf<   k||fS )a  
        Returns the mean and marginal effects at the `data_predict` points.

        Parameters
        ----------
        data_predict : array_like, optional
            Points at which to return the mean and marginal effects.  If not
            given, ``data_predict == exog``.

        Returns
        -------
        mean : ndarray
            The regression result for the mean (i.e. the actual curve).
        mfx : ndarray
            The marginal effects, i.e. the partial derivatives of the mean.
        Nr   rQ   r   )r-   r   r$   r   r"   r%   r(   rY   rt   r4   r#   r   	r6   rQ   rM   N_data_predictrc   rd   irb   mfx_cs	            r8   ru   zKernelReg.fitk  s    " x&9LL(t{CCL,//2x)**h455~&& 	 	AtDGTZ)5ad);= = =Hj!--DGJx{++EC111IISyr:   2      Fc                 l   t          j        |          }t          | j                  \  }}}t          j        ||                   rWt          j        ||                   st          j        ||                   rt          d          t          | ||||          }nt          | ||          }|j        S )a  
        Significance test for the variables in the regression.

        Parameters
        ----------
        var_pos : sequence
            The position of the variable in exog to be tested.

        Returns
        -------
        sig : str
            The level of significance:

                - `*` : at 90% confidence level
                - `**` : at 95% confidence level
                - `***` : at 99* confidence level
                - "Not Significant" : if not significant
        z3Discrete variable in hypothesis. Must be continuous)	r%   r1   r
   r   anyr    TestRegCoefCTestRegCoefDsig)	r6   var_posnboot
nested_respivotix_contix_ordix_unordSigs	            r8   sig_testzKernelReg.sig_test  s    & *W%%$1$-$@$@!6''"## 	5vfWo&& X"&'1B*C*C X !VWWWtWeZGGCCtWe44Cwr:   c                     d}|dt          | j                  z   dz   z  }|dt          | j                  z   dz   z  }|d| j        z   dz   z  }|d| j        z   dz   z  }|d| j        z   dz   z  }|S ) Provide something sane to print.zKernelReg instance
Number of variables: k_vars = 
zNumber of samples:   N = Variable types:      BW selection method: Estimator type: r0   r"   r)   r   rD   r   r6   rprs     r8   __repr__zKernelReg.__repr__  s    $/#dk2B2BBTII*S^^;dBB&6==&84??!DM1D88
r:   c                 6    d}| j         | j        | j        f}||fS )z@Helper method to be able to pass needed vars to _compute_subset.r   )r   r"   r   )r6   
class_type
class_varss      r8   _get_class_vars_typezKernelReg._get_class_vars_type  s$     
mT[$-@
:%%r:   c                 <    |ddddf         }t          |          S )a^  
        Computes the measure of dispersion.

        The minimum of the standard deviation and interquartile range / 1.349

        References
        ----------
        See the user guide for the np package in R.
        In the notes on bwscaling option in npreg, npudens, npcdens there is
        a discussion on the measure of dispersion
        Nr   )r   )r6   r'   s     r8   _compute_dispersionzKernelReg._compute_dispersion  s'     AAAqrrE{#D)))r:   )r   r   r   r   r   NN)r   r   F)__name__
__module____qualname____doc__r9   r3   r,   r+   rF   rE   r   ru   r   r   r   r    r:   r8   r   r   -   s        . .^ AH/:6:2 2 2 2@     *: : :x/ / /b) ) ) )V' ' 'R# # #0! ! ! !F   >  & & &* * * * *r:   c                   D    e Zd ZdZ	 	 	 	 	 ddZd	 Zd
 Zd Zd ZddZ	dS )r   a,  
    Nonparametric censored regression.

    Calculates the conditional mean ``E[y|X]`` where ``y = g(X) + e``,
    where y is left-censored.  Left censored variable Y is defined as
    ``Y = min {Y', L}`` where ``L`` is the value at which ``Y`` is censored
    and ``Y'`` is the true value of the variable.

    Parameters
    ----------
    endog : list with one element which is array_like
        This is the dependent variable.
    exog : list
        The training data for the independent variable(s)
        Each element in the list is a separate variable
    dep_type : str
        The type of the dependent variable(s)
        c: Continuous
        u: Unordered (Discrete)
        o: Ordered (Discrete)
    reg_type : str
        Type of regression estimator
        lc: Local Constant Estimator
        ll: Local Linear Estimator
    bw : array_like
        Either a user-specified bandwidth or
        the method for bandwidth selection.
        cv_ls: cross-validation least squares
        aic: AIC Hurvich Estimator
    ckertype : str, optional
        The kernel used for the continuous variables.
    okertype : str, optional
        The kernel used for the ordered discrete variables.
    ukertype : str, optional
        The kernel used for the unordered discrete variables.
    censor_val : float
        Value at which the dependent variable is censored
    defaults : EstimatorSettings instance, optional
        The default values for the efficient bandwidth estimation

    Attributes
    ----------
    bw : array_like
        The bandwidth parameters
    r   r   aitchison_aitken_regwangryzin_regr   Nc                 ~   || _         || _        || _        || _        || _        || _        | j        t          v r| j        t          v r| j        t          v st          d          t          | j                   | _	        t          |d          | _        t          || j	                  | _        t          j        | j        | j        f          | _        t          j        | j                  d         | _        t%          | j        | j                  | _        |
t-                      n|
}
|                     |
           |	| _        | j        |                     |	           n t          j        | j        df          | _        | j        s|                     |          | _        d S |                     |          | _        d S )Nr   r   r   r   ) r   r   r   r   r   r   r   r    r!   r"   r   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r   r.   
censor_valcensoredonesW_inr2   r3   r4   r5   )r6   r#   r$   r   r   r4   r   r   r   r   r7   s              r8   r9   zKernelCensoredReg.__init__  s   
 !!    ,,+1M1MM[00 N O O O $-(("5!,,
!$44	OTZ$;<<	HTY''*	41d6JKKK*2*:$&&&8$$$$?&MM*%%%%A//DI~ 	2**2..DGGG--b11DGGGr:   c                    | j         |k    dz  | _        t          j        t          j        | j                             }|| _        t          j        |j        t                    | _	        t          j
        t          |                    | j	        |<   t          j        | j         |                   | _         t          | j         d          | _         t          j        | j        |                   | _        t          j        | j        |                   | _        t          j        | j        df          | _        t#          d| j        dz             D ]}d}t#          d|          D ];}|| j        |z
  t%          | j                  |z
  dz   z  | j        |dz
           z  z  }<|| j        |dz
           z  t%          | j                  |z
  dz   z  | j        |dz
  df<   d S )N      ?r   r   )r#   dr%   argsortr   sortixzerosr(   int
sortix_revaranger!   r   r$   rY   r)   r   rt   rT   )r6   r   ixr   Pry   s         r8   r   zKernelCensoredReg.censored  s   *
*b0Z
4:..//(28S11 iB00Z
2//
"4:q11
Jty}--	DF2J''Hdi^,,	q$)a-(( 	M 	MAA1a[[ K Kty1}uTY'7'7'9!';<tvac{JJ 46!A#;%	2B2BQ2F2JLDIac!e		M 	Mr:   c                     d}|dt          | j                  z   dz   z  }|dt          | j                  z   dz   z  }|d| j        z   dz   z  }|d| j        z   dz   z  }|d| j        z   dz   z  }|S )r   zKernelCensoredReg instance
r   r   zNumber of samples:   nobs = r   r   r   r   r   s     r8   r   zKernelCensoredReg.__repr__0  s    ,/#dk2B2BBTII-DI>EE&6==&84??!DM1D88
r:   c           
         |j         \  }}t          |||| j        | j        | j        | j        d          }||ddt          j        f         z  }||z
  }	t          j        |	j	        |	|z            }
|	|z  
                    d          }	t          j        |dz   |dz   f          }|
                                |d<   |	|dddf<   |	|dddf<   |
|ddddf<   ||z  }t          j        |dz   df          }|
                                |d<   ||z
  |z  
                    d          |dddf<   t          j        t          j                            |          |          }|d         }|ddddf         }||fS )a  
        Local linear estimator of g(x) in the regression ``y = g(x) + e``.

        Parameters
        ----------
        bw : array_like
            Vector of bandwidth value(s)
        endog : 1D array_like
            The dependent variable
        exog : 1D or 2D array_like
            The independent variable(s)
        data_predict : 1D array_like of length K, where K is
            the number of variables. The point at which
            the density is estimated

        Returns
        -------
        D_x : array_like
            The value of the conditional mean at data_predict

        Notes
        -----
        See p. 81 in [1] and p.38 in [2] for the formulas
        Unlike other methods, this one requires that data_predict be 1D
        FrP   Nr   r<   r   rS   )r(   r   r   r   r   r   r%   rU   rV   rW   rX   rY   rZ   r[   )r6   r4   r#   r$   rQ   Wr)   r"   r\   r]   r^   r_   r`   ra   rb   rc   rd   s                    r8   r,   z!KernelCensoredReg._est_loc_linear:  s   4 zf2D| M M M M	8 8 8 #aaam$$\!fSUC#I&&Syoo1o%%Hfqj&1*-..''))$!QRR%!""a%!""abb&	%K	Hfqj!_%%--//$L(I5:::BB!""a%6")..++Q//{qrr111uoSyr:   c           
         t          | j                  }t          | j                                                  }t          | j                                                  }d}t          |          D ]`\  }}t          |          }	t          |          }
 |||	| | j        |ddf          |
          d         }|| j        |         |z
  dz  z  }a|| j        z  S )a{  
        The cross-validation function with leave-one-out
        estimator

        Parameters
        ----------
        bw : array_like
            Vector of bandwidth values
        func : callable function
            Returns the estimator of g(x).
            Can be either ``_est_loc_constant`` (local constant) or
            ``_est_loc_linear`` (local_linear).

        Returns
        -------
        L : float
            The value of the CV function

        Notes
        -----
        Calculates the cross-validation least-squares
        function. This function is minimized by compute_bw
        to calculate the optimal value of bw

        For details see p.35 in [2]

        .. math:: CV(h)=n^{-1}\sum_{i=1}^{n}(Y_{i}-g_{-i}(X_{i}))^{2}

        where :math:`g_{-i}(X_{i})` is the leave-one-out estimator of g(X)
        and :math:`h` is the vector of bandwidths
        r   N)r#   r$   rQ   r   rg   )r	   r$   r#   r   r   r   r   r)   )r6   r4   rM   r   r   LOO_Wr   r   r   r   wrl   s               r8   rE   zKernelCensoredReg.cv_loot  s    @ DI&&DJ''0022DI&&//11$U++ 	+ 	+KBUAUARqx#'9RU#3"3q: : ::;=A$*R.1$**AA 49}r:   c           
          | j         | j                 }|| j        }nt          || j                  }t          j        |          d         }t          j        |f          }t          j        || j        f          }t          |          D ]p} || j	        | j
        | j        ||ddf         | j                  }t          j        |d                   ||<   t          j        |d                   }|||ddf<   q||fS )zJ
        Returns the marginal effects at the data_predict points.
        Nr   )rQ   r   r   )r-   r   r$   r   r"   r%   r(   rY   rt   r4   r#   r   r   r   s	            r8   ru   zKernelCensoredReg.fit  s    x&9LL(t{CCL,//2x)**h455~&& 	 	AtDGTZ)5ad);"i) ) )H j!--DGJx{++EC111IISyr:   )r   r   r   r   r   Nr   )
r   r   r   r   r9   r   r   r,   rE   ru   r   r:   r8   r   r     s        , ,Z <C$0)(,	!2 !2 !2 !2FM M M$  7 7 7t, , ,\     r:   c                   <    e Zd ZdZ	 	 ddZd Zd Zd Zd Zd	 Z	d
S )r   a  
    Significance test for continuous variables in a nonparametric regression.

    The null hypothesis is ``dE(Y|X)/dX_not_i = 0``, the alternative hypothesis
    is ``dE(Y|X)/dX_not_i != 0``.

    Parameters
    ----------
    model : KernelReg instance
        This is the nonparametric regression model whose elements
        are tested for significance.
    test_vars : tuple, list of integers, array_like
        index of position of the continuous variables to be tested
        for significance. E.g. (1,3,5) jointly tests variables at
        position 1,3 and 5 for significance.
    nboot : int
        Number of bootstrap samples used to determine the distribution
        of the test statistic in a finite sample. Default is 400
    nested_res : int
        Number of nested resamples used to calculate lambda.
        Must enable the pivot option
    pivot : bool
        Pivot the test statistic by dividing by its standard error
        Significantly increases computational time. But pivot statistics
        have more desirable properties
        (See references)

    Attributes
    ----------
    sig : str
        The significance level of the variable(s) tested
        "Not Significant": Not significant at the 90% confidence level
                            Fails to reject the null
        "*": Significant at the 90% confidence level
        "**": Significant at the 95% confidence level
        "***": Significant at the 99% confidence level

    Notes
    -----
    This class allows testing of joint hypothesis as long as all variables
    are continuous.

    References
    ----------
    Racine, J.: "Consistent Significance Testing for Nonparametric Regression"
    Journal of Business & Economics Statistics.

    Chapter 12 in [1].
      Fc                 B   || _         || _        || _        || _        |j        | _        |j        | _        t          | j                  | _        |j        | _        |j	        | _	        |j
        |j                 | _        || _        || _        |                                  d S r   )r   nres	test_varsmodelr4   r   r!   r"   r#   r$   r-   r   r{   r   run)r6   r   r   r   r   r   s         r8   r9   zTestRegCoefC.__init__  s    
	"
($-(([
J	)EN+"





r:   c                     |                      | j        | j                  | _        |                                 | _        d S r   )_compute_test_statr#   r$   	test_stat_compute_sigr   )r6   s    r8   r   zTestRegCoefC.run  s4    00TYGG$$&&r:   c                     |                      ||          }|}| j        r(|                     ||          }|t          |          z  }|S )zA
        Computes the test statistic.  See p.371 in [8].
        )_compute_lambdar   _compute_se_lambdarT   )r6   r   rK   lamtse_lams         r8   r   zTestRegCoefC._compute_test_stat  sR     ""1a((: 	$,,Q22FeFmm#Ar:   c           
         t          j        |          d         }t          |d          }t          || j                  }t	          ||| j        | j        j        | j        t          d                    
                                d         }|dd| j        f         }t          j        ||t          | j                  f          }d}||z  dz                                  t          |          z  }|S )	z;Computes only lambda -- the main part of the test statisticr   r   Frs   r7   Nr   rg   )r%   r(   r   r"   r   r   r   r   r4   r   ru   r   rh   r!   rX   rT   )r6   r   rK   nbfctr   s          r8   r   zTestRegCoefC._compute_lambda  s    HQKKN!Q!T[))aDM4:+>#4u#E#E#EG G GGJsuuQP aaa Jq1c$.11233CA~""$$uQxx/
r:   c                 l   t          j        |          d         }t          j        | j        f          }t	          | j                  D ]U}t           j                            d||df          }||df         }||ddf         }|                     ||          ||<   Vt          j        |          }	|	S )z
        Calculates the SE of lambda by nested resampling
        Used to pivot the statistic.
        Bootstrapping works better with estimating pivotal statistics
        but slows down computation significantly.
        r   r(   r   rH   N)	r%   r(   rY   r   rt   randomrandintr   rG   )
r6   r   rK   r   r   r   indY1X1	se_lambdas
             r8   r   zTestRegCoefC._compute_se_lambda  s     HQKKNhdi\***ty!! 	2 	2A)##Aq1v#66C36B36B))"b11CFFF3KK	r:   c           
         t          j        | j        f          }| j        }t	          j        | j                  }t          j        |          d         }t          j        |dd| j	        f         d          |dd| j	        f<   t          ||| j        | j        j        | j        t          d                                                    d         }t          j        ||df          }||z
  }|t          j        |          z
  }t%          | j                  D ]S}t           j                            d||df	          }||df         }	||	z   }
|                     |
| j                  ||<   T|| _        d
}| j        t1          |d          k    rd}| j        t1          |d          k    rd}| j        t1          |d          k    rd}|S )a'  
        Computes the significance value for the variable(s) tested.

        The empirical distribution of the test statistic is obtained through
        bootstrapping the sample.  The null hypothesis is rejected if the test
        statistic is larger than the 90, 95, 99 percentiles.
        r   r   Nr<   Frs   r   r   r   Not Significant?*ffffff?**Gz?***)r%   rY   r   r#   copydeepcopyr$   r(   rc   r   r   r   r   r   r4   r   ru   rh   rt   r   r   r   t_distr   r   )r6   r   r   rK   r   r_   er   r   e_bootY_bootr   s               r8   r   zTestRegCoefC._compute_sig0  s    ///JM$)$$HQKKN!wqDN):';!DDD!!!T^
aDM4:+>05AAAC C CCF355LJq1a&!!E

Ntz"" 	C 	CA)##Aq1v#66CsAvYFZF//	BBF1II>Jvs3333C>Jvt4444C>Jvt4444C
r:   N)r   r   F)
r   r   r   r   r9   r   r   r   r   r   r   r:   r8   r   r     s        0 0h @C    ' ' '
 
 
    $# # # # #r:   r   c                   $    e Zd ZdZd Zd Zd ZdS )r   a  
    Significance test for the categorical variables in a nonparametric
    regression.

    Parameters
    ----------
    model : Instance of KernelReg class
        This is the nonparametric regression model whose elements
        are tested for significance.
    test_vars : tuple, list of one element
        index of position of the discrete variable to be tested
        for significance. E.g. (3) tests variable at
        position 3 for significance.
    nboot : int
        Number of bootstrap samples used to determine the distribution
        of the test statistic in a finite sample. Default is 400

    Attributes
    ----------
    sig : str
        The significance level of the variable(s) tested
        "Not Significant": Not significant at the 90% confidence level
                            Fails to reject the null
        "*": Significant at the 90% confidence level
        "**": Significant at the 95% confidence level
        "***": Significant at the 99% confidence level

    Notes
    -----
    This class currently does not allow joint hypothesis.
    Only one variable can be tested at a time

    References
    ----------
    See [9] and chapter 12 in [1].
    c           
         t          j        t          j        | j        dd| j        f                             }t          j        |          d         }t          ||| j        | j        j	        | j
        t          d                    }t          j        |          }d|dd| j        f<   |                    |          d         }t          j        ||df          }t          j        |df          }|dd         D ]N}	|	|dd| j        f<   |                    |          d         }
t          j        |
|df          }
||
|z
  dz  z  }O|                    d	          t%          |          z  }|S )
zComputes the test statisticNr   Frs   r   r   r   rg   r<   )r%   sortuniquer$   r   r(   r   r   r   r   r4   r   r   r   ru   rh   r   rX   rT   )r6   r   rK   dom_xr   r   r   m0zvecr   m1avgs               r8   r   zTestRegCoefD._compute_test_stat|  sl    	$)AAAt~,=">??@@HQKKN!Qtz/BDG%6%G%G%GI I I]1 !111dnYYBY''*ZQF##xAqrr 	# 	#A$%Bqqq$. !++A.BBA''BR"WN"DDhhAhq)
r:   c                    |                                  }| j        }| j        }t          j        |          d         }||z
  }|t          j        |          z
  }d}d}||z  }||z  }	|dz  }
t          j        | j        df          }t          | j                  D ]i}t          j
        |	          }t          j                            dd|df          }||
k     }||         ||<   ||z   }|                     ||          ||<   jd}| j        t          |d          k    rd	}| j        t          |d
          k    rd}| j        t          |d          k    rd}|S )z8Calculates the significance level of the variable testedr   gP/7gw?gw@r   r   r   r   r   r   r   r   r   )_est_cond_meanr#   r$   r%   r(   rc   rY   r   rt   r   r   r   uniformr   r   r   )r6   mr   rK   r   ufct1fct2u1u2rI_distry   u_bootprobr   r  r   s                     r8   r   zTestRegCoefD._compute_sig  si    !!JIHQKKNE

N  AXAXH4:a.))tz"" 	; 	;A]2&&F9$$Qq!A$77D(CS'F3KZF//::F1II>Jvs3333C>Jvt4444C>Jvt4444C
r:   c                    t          j        t          j        | j        dd| j        f                             | _        t          j        | j                  }d}| j        D ]4}||dd| j        f<   || j        	                    |          d         z  }5|t          t          | j                            z  }t          j        |t          j        | j                  d         df          }|S )z_
        Calculates the expected conditional mean
        m(X, Z=l) for all possible l
        Nr   r   r   )r%   r  r  r$   r   r  r   r   r   ru   rT   r!   rh   r(   )r6   rK   r  r   s       r8   r  zTestRegCoefD._est_cond_mean  s    
 WRYtyDN1B'CDDEE
M$)$$
 	5 	5A$%Aaaa q11!44AAc$*oo&&&Jq28DI..q11566r:   N)r   r   r   r   r   r   r  r   r:   r8   r   r   V  sM        # #J  .     D    r:   r   )r   r   numpyr%   scipyr   scipy.stats.mstatsr   _kernel_baser   r   r   r	   r
   r   r   r   __all__r   r   r   r   r   r:   r8   <module>r      s   >            ) ) ) ) ) )Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q +
,]* ]* ]* ]* ]*
 ]* ]* ]*@j j j j j	 j j jZY Y Y Y Y Y Y Yxm m m m m< m m m m mr:   