
    M/Ph7                         d Z ddlZddlmZ ddlmZ ddlmZm	Z	 ddl
mZmZmZmZ g dZ G d d	          Z G d
 de	          Z G d de	          ZdS )aY  
Multivariate Conditional and Unconditional Kernel Density Estimation
with Mixed Data Types

References
----------
[1] Racine, J., Li, Q. Nonparametric econometrics: theory and practice.
    Princeton University Press. (2007)
[2] Racine, Jeff. "Nonparametric Econometrics: A Primer," Foundation
    and Trends in Econometrics: Vol 3: No 1, pp1-88. (2008)
    http://dx.doi.org/10.1561/0800000009
[3] Racine, J., Li, Q. "Nonparametric Estimation of Distributions
    with Categorical and Continuous Data." Working Paper. (2000)
[4] Racine, J. Li, Q. "Kernel Estimation of Multivariate Conditional
    Distributions Annals of Economics and Finance 5, 211-235 (2004)
[5] Liu, R., Yang, L. "Kernel estimation of multivariate
    cumulative distribution function."
    Journal of Nonparametric Statistics (2008)
[6] Li, R., Ju, G. "Nonparametric Estimation of Multivariate CDF
    with Categorical and Continuous Data." Working Paper
[7] Li, Q., Racine, J. "Cross-validated local linear nonparametric
    regression" Statistica Sinica 14(2004), pp. 485-512
[8] Racine, J.: "Consistent Significance Testing for Nonparametric
        Regression" Journal of Business & Economics Statistics
[9] Racine, J., Hart, J., Li, Q., "Testing the Significance of
        Categorical Predictor Variables in Nonparametric Regression
        Models", 2006, Econometric Reviews 25, 523-544

    N)optimize)
mquantiles)KDEMultivariate	KernelReg)gpkeLeaveOneOut_get_type_pos_adjust_shape)SingleIndexModel
SemiLinear	TestFFormc                   &    e Zd ZdZddZd Zd ZdS )r   a]  
    Nonparametric test for functional form.

    Parameters
    ----------
    endog : list
        Dependent variable (training set)
    exog : list of array_like objects
        The independent (right-hand-side) variables
    bw : array_like, str
        Bandwidths for exog or specify method for bandwidth selection
    fform : function
        The functional form ``y = g(b, x)`` to be tested. Takes as inputs
        the RHS variables `exog` and the coefficients ``b`` (betas)
        and returns a fitted ``y_hat``.
    var_type : str
        The type of the independent `exog` variables:

            - c: continuous
            - o: ordered
            - u: unordered

    estimator : function
        Must return the estimated coefficients b (betas). Takes as inputs
        ``(endog, exog)``.  E.g. least square estimator::

            lambda (x,y): np.dot(np.pinv(np.dot(x.T, x)), np.dot(x.T, y))

    References
    ----------
    See Racine, J.: "Consistent Significance Testing for Nonparametric
    Regression" Journal of Business & Economics Statistics.

    See chapter 12 in [1]  pp. 355-357.
    d   c                     || _         || _        || _        || _        || _        || _        t          |||          j        | _        |                                 | _	        d S )N)bwvar_type)
endogexogr   fform	estimatornbootr   r   _compute_sigsig)selfr   r   r   r   r   r   r   s           o/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/kernel_extras.py__init__zTestFForm.__init__P   s\    
	 
"
!$2AAAD$$&&    c                    | j         }| j        }|                     ||          }|                     ||          }t	          j        |          d         }||z
  }|t	          j        |          z
  }|                     |          | _        t	          j	        d          }d|z
  dz  }d|z   dz  }	||z  }
|	|z  }|	|z  }t	          j
        | j        df          }t          | j                  D ]}|                                }t          j                            dd|f          }||k     }|
|         ||<   ||z   }|                     ||          }|                     ||          }||z
  }|                     |          ||<   || _        d}| j        t#          |d          k    rd}| j        t#          |d	          k    rd
}| j        t#          |d          k    rd}|S )Nr   g      @   g       @sizezNot Significantg?*gffffff?z**gGz?z***)r   r   r   r   npshapemean_compute_test_stat	test_statsqrtemptyr   rangecopyrandomuniformboots_resultsr   )r   YXbmnresidsqrt5fct1fct2u1u2rI_distju_bootprobindY_bootb_hatm_hat
u_boot_hatr   s                          r   r   zTestFForm._compute_sigZ   s   JINN1a  JJq!HQKKNA&0077E	RE	RE\E\5L4:a.))tz"" 
	< 
	<AWWYYF9$$Qq!$66D(CS'F3KZFNN61--EJJq%((E%J//
;;F1II#>Jvs3333C>Jvt4444C>Jvt4444C
r   c           	         t          j        |          d         }t          | j                  }t          |d d d f                                                   }d}d}t          |          D ]\  }}t          |          }	t          j        |	          }	t          | j	        | | j        |d d f          | j
        d          }
||         |	z  |
z  }|	j        |
j        k    sJ ||                                z  }||dz                                  z  }t          j        |          dk    sJ t          j        |          dk    sJ |d||dz
  z  z  z  }t          | j
                  d         }| j	        |                                         }|d|z  ||dz
  z  z  z  }||z  t          j        ||z            z  }|S )Nr   F)datadata_predictr   tosum   r   g      ?)r#   r$   r   r   __iter__	enumeratenextsqueezer   r   r   sumr!   r	   prodr(   )r   ur3   XLOOuLOOivalS2iX_not_iu_jKf_iix_conthpTs                  r   r&   zTestFForm._compute_test_stat   s   HQKKN49%%1QQQtV9%%..00#D// 	$ 	$JAwt**C*S//CTWG849QT?:J"m5: : :AQ4#:>C9''''CGGIID36,,.. B74==A%%%%72;;!#####a1q5k""..q1WW""$$
a"fQU$$HrwrBw'''r   N)r   )__name__
__module____qualname____doc__r   r   r&    r   r   r   r   ,   sR        " "F' ' ' '$ $ $L    r   r   c                   2    e Zd ZdZd Zd Zd ZddZd ZdS )	r   a  
    Single index semiparametric model ``y = g(X * b) + e``.

    Parameters
    ----------
    endog : array_like
        The dependent variable
    exog : array_like
        The independent variable(s)
    var_type : str
        The type of variables in X:

            - c: continuous
            - o: ordered
            - u: unordered

    Attributes
    ----------
    b : array_like
        The linear coefficients b (betas)
    bw : array_like
        Bandwidths

    Methods
    -------
    fit(): Computes the fitted values ``E[Y|X] = g(X * b)``
           and the marginal effects ``dY/dX``.

    References
    ----------
    See chapter on semiparametric models in [1]

    Notes
    -----
    This model resembles the binary choice models. The user knows
    that X and b interact linearly, but ``g(X * b)`` is unknown.
    In the parametric binary choice models the user usually assumes
    some distribution of g() such as normal or logistic.
    c                    || _         t          |          | _        | j         d         | _         t          |d          | _        t          || j                  | _        t          j        | j                  d         | _        | j         | _	        d| _
        d| _        d| _        | j        | _        |                                 \  | _        | _        d S )Nr   r   gaussian	wangryzinaitchisonaitken)r   lenrW   r
   r   r   r#   r$   nobs	data_typeckertypeokertypeukertype_est_loc_linearfunc	_est_b_bwr1   r   )r   r   r   r   s       r   r   zSingleIndexModel.__init__   s     Xa("5!,,
!$//	HTY''*	"#)(	..**r   c                     t           j                            | j        dz   f          }t	          j        | j        |d          }|d| j                 }|| j        d          }|                     |          }||fS )Nr   r    r   disp)r#   r,   r-   rW   r   fmincv_loo_set_bw_boundsr   params0b_bwr1   r   s        r   rn   zSingleIndexModel._est_b_bw   st    )##$&1*#88}T[':::46N$&'']  $$"ur   c                 0   t          j        |          }|d| j                 }|| j        d          }t          | j                  }t          | j                                                  }d}t          |          D ]\  }}t          |          }	| 	                    ||	t          j
        ||          d d d f          t          j
        | j        ||dz   d d f         |                     d         }
|| j        |         |
z
  dz  z  }|| j        z  S )Nr   r   r   r   rF   rH   )r#   asarrayrW   r   r   r   rI   rJ   rK   rm   dotrg   )r   paramsr1   r   LOO_XLOO_YLrT   rU   r/   Gs              r   rs   zSingleIndexModel.cv_loo   s   F##1tv:DFGG_DI&&DJ''0022#E** 	* 	*JAwUA		"ARVGQ-?-?$-G,G(*ty1Q3/BA(F(F'F  H HHIKA $*Q-!#))AA 49}r   Nc                 >   || j         }nt          || j                  }t          j        |          d         }t          j        |f          }t          j        || j        f          }t          |          D ]}|                     | j        | j	        t          j
        | j         | j                  d d d f         t          j
        |||dz   d d f         | j                            }|d         ||<   t          j        |d                   }|||d d f<   ||fS )Nr   r   rF   )r   r
   rW   r#   r$   r)   r*   rm   r   r   r{   r1   rL   )r   rF   N_data_predictr%   mfxrT   mean_mfxmfx_cs           r   fitzSingleIndexModel.fit   s   9LL(tv>>L,//2x)**h/00~&& 	 	Ayy$*!#	46!:!:111T6!B.0f\!AaC%(5KDF.S.S ! U UH qkDGJx{++EC111IISyr   c                     d}|dt          | j                  z   dz   z  }|dt          | j                  z   dz   z  }|d| j        z   dz   z  }|dz  }|dz  }|S ) Provide something sane to print.zSingle Index Model 
Number of variables: K = 
zNumber of samples:   nobs = Variable types:      BW selection method: cv_ls
Estimator type: local constant
strrW   rg   r   r   reprs     r   __repr__zSingleIndexModel.__repr__  ss    &+c$&kk9D@@.TY?$FF'$-7$>>3377r   )N	r\   r]   r^   r_   r   rn   rs   r   r   r`   r   r   r   r      so        & &N+ + +    (   &    r   r   c                   2    e Zd ZdZd Zd Zd ZddZd ZdS )	r   a}  
    Semiparametric partially linear model, ``Y = Xb + g(Z) + e``.

    Parameters
    ----------
    endog : array_like
        The dependent variable
    exog : array_like
        The linear component in the regression
    exog_nonparametric : array_like
        The nonparametric component in the regression
    var_type : str
        The type of the variables in the nonparametric component;

            - c: continuous
            - o: ordered
            - u: unordered

    k_linear : int
        The number of variables that comprise the linear component.

    Attributes
    ----------
    bw : array_like
        Bandwidths for the nonparametric component exog_nonparametric
    b : array_like
        Coefficients in the linear component
    nobs : int
        The number of observations.
    k_linear : int
        The number of variables that comprise the linear component.

    Methods
    -------
    fit
        Returns the fitted mean and marginal effects dy/dz

    Notes
    -----
    This model uses only the local constant regression estimator

    References
    ----------
    See chapter on Semiparametric Models in [1]
    c                    t          |d          | _        t          ||          | _        t          |          | _        t          || j                  | _        || _        t          j        | j                  d         | _	        || _
        | j
        | _        d| _        d| _        d| _        | j        | _        |                                 \  | _        | _        d S )Nr   r   rc   rd   re   )r
   r   r   rf   rW   exog_nonparametrick_linearr#   r$   rg   r   rh   ri   rj   rk   rl   rm   rn   r1   r   )r   r   r   r   r   r   s         r   r   zSemiLinear.__init__;  s    "5!,,
!$11	X"/0BDF"K"K HTY''*	 "#)(	..**r   c                     t           j                            | j        | j        z   f          }t          j        | j        |d          }|d| j                 }|| j        d         }||fS )z
        Computes the (beta) coefficients and the bandwidths.

        Minimizes ``cv_loo`` with respect to ``b`` and ``bw``.
        r    r   rp   N)r#   r,   r-   r   rW   r   rr   rs   ru   s        r   rn   zSemiLinear._est_b_bwK  sj     )##$-$&*@)C#DD}T[':::T]"#$-..!"ur   c           
         t          j        |          }|d| j                 }|| j        d         }t          | j                  }t          | j                                                  }t          | j                                                  }t          j        | j        |          dddf         }d}t          |          D ]\  }	}
t          |          }t          |          }t          j        |
|          dddf         }||z
  }|                     ||| | j        |	ddf                    d         }||	ddf         }|| j        |	         |z
  |z
  dz  z  }|S )a  
        Similar to the cross validation leave-one-out estimator.

        Modified to reflect the linear components.

        Parameters
        ----------
        params : array_like
            Vector consisting of the coefficients (b) and the bandwidths (bw).
            The first ``k_linear`` elements are the coefficients.

        Returns
        -------
        L : float
            The value of the objective function

        References
        ----------
        See p.254 in [1]
        r   Nry   rH   )r#   rz   r   r   r   r   rI   r   r{   rJ   rK   rm   )r   r|   r1   r   r}   r~   LOO_ZXbr   iirU   r/   ZXb_jYxr   lts                    r   rs   zSemiLinear.cv_looX  so   * F##1t}$%DMNN#DI&&DJ''0022D344==??VDIq!!!!!D&)$U++ 	0 	0KBUAUA6'1%%aaaf-DTB		"BaR(,(?AAA(F'F  H HHIKABEB$*R.2%)a//AAr   Nc           
      L   || j         }nt          || j                  }|| j        }nt          || j                  }t          j        |          d         }t          j        |f          }t          j        || j        f          }| j        t          j	        || j
                  dddf         z
  }t          |          D ]]}|                     | j        || j        ||ddf                   }|d         ||<   t          j        |d                   }	|	||ddf<   ^||fS )z+Computes fitted values and marginal effectsNr   r   r   )r   r
   r   r   rW   r#   r$   r)   r   r{   r1   r*   rm   r   rL   )
r   exog_predictexog_nonparametric_predictr   r%   r   r/   rT   r   r   s
             r   r   zSemiLinear.fit  s8    9LL(t}EEL%-)-)@&&)67QSWSY)Z)Z&"<==a@x)**h/00Jdf55aaaf==~&& 	 	Ayy!T-D.HAAA.N ! P PHqkDGJx{++EC111IISyr   c                     d}|dt          | j                  z   dz   z  }|dt          | j                  z   dz   z  }|d| j        z   dz   z  }|dz  }|dz  }|S )r   z'Semiparamatric Partially Linear Model 
r   r   zNumber of samples:   N = r   r   r   r   r   s     r   r   zSemiLinear.__repr__  ss    9+c$&kk9D@@+c$)nn<tCC'$-7$>>3377r   )NNr   r`   r   r   r   r     sp        , ,\+ + +   ' ' 'R   4    r   r   )r_   numpyr#   scipyr   scipy.stats.mstatsr   statsmodels.nonparametric.apir   r   &statsmodels.nonparametric._kernel_baser   r   r	   r
   __all__r   r   r   r`   r   r   <module>r      sD   >           ) ) ) ) ) ) D D D D D D D D4 4 4 4 4 4 4 4 4 4 4 4 :
9
9l l l l l l l l^n n n n ny n n nbW W W W W W W W W Wr   