
    M/Ph/                        d Z ddlmZ ddlZddlmZmZ ddlm	Z	 ddl
mZmZmZ ddlmZmZmZ ddlmc mZ ddlmZ dd	lmZ dZd
 Z G d de          Z G d de          ZdZ G d de          Z G d de          Z G d de          Z  G d de           Z! G d de           Z" G d de          Z#d dZ$ G d de          Z%ee#edZ&dS )!a{  Generalized Method of Moments, GMM, and Two-Stage Least Squares for
instrumental variables IV2SLS



Issues
------
* number of parameters, nparams, and starting values for parameters
  Where to put them? start was initially taken from global scope (bug)
* When optimal weighting matrix cannot be calculated numerically
  In DistQuantilesGMM, we only have one row of moment conditions, not a
  moment condition for each observation, calculation for cov of moments
  breaks down. iter=1 works (weights is identity matrix)
  -> need method to do one iteration with an identity matrix or an
     analytical weighting matrix given as parameter.
  -> add result statistics for this case, e.g. cov_params, I have it in the
     standalone function (and in calc_covparams which is a copy of it),
     but not tested yet.
  DONE `fitonce` in DistQuantilesGMM, params are the same as in direct call to fitgmm
      move it to GMM class (once it's clearer for which cases I need this.)
* GMM does not know anything about the underlying model, e.g. y = X beta + u or panel
  data model. It would be good if we can reuse methods from regressions, e.g.
  predict, fitted values, calculating the error term, and some result statistics.
  What's the best way to do this, multiple inheritance, outsourcing the functions,
  mixins or delegation (a model creates a GMM instance just for estimation).


Unclear
-------
* dof in Hausman
  - based on rank
  - differs between IV2SLS method and function used with GMM or (IV2SLS)
  - with GMM, covariance matrix difference has negative eigenvalues in iv example, ???
* jtest/jval
  - I'm not sure about the normalization (multiply or divide by nobs) in jtest.
    need a test case. Scaling of jval is irrelevant for estimation.
    jval in jtest looks to large in example, but I have no idea about the size
* bse for fitonce look too large (no time for checking now)
    formula for calc_cov_params for the case without optimal weighting matrix
    is wrong. I do not have an estimate for omega in that case. And I'm confusing
    between weights and omega, which are *not* the same in this case.



Author: josef-pktd
License: BSD (3-clause)

    )lrangeN)optimizestats)approx_fprime)ModelLikelihoodModelLikelihoodModelResults)OLSRegressionResultsRegressionResultsWrapper)cache_readonly)
_ensure_2dc                 N    t          j        |                                           S )z'just a shortcut to np.abs(x).max()
    )npabsmax)xs    b/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/sandbox/regression/gmm.pymaxabsr   D   s     6!99==??    c                   >     e Zd ZdZd fd	Zd Zd Zd ZddZ xZ	S )	IV2SLSaU  
    Instrumental variables estimation using Two-Stage Least-Squares (2SLS)


    Parameters
    ----------
    endog : ndarray
       Endogenous variable, 1-dimensional or 2-dimensional array nobs by 1
    exog : ndarray
       Explanatory variables, 1-dimensional or 2-dimensional array nobs by k
    instrument : ndarray
       Instruments for explanatory variables. Must contain both exog
       variables that are not being instrumented and instruments

    Notes
    -----
    All variables in exog are instrumented in the calculations. If variables
    in exog are not supposed to be instrumented, then these variables
    must also to be included in the instrument array.

    Degrees of freedom in the calculation of the standard errors uses
    `df_resid = (nobs - k_vars)`.
    (This corresponds to the `small` option in Stata's ivreg2.)
    Nc                 2   t          |d          \  | _        | _        t                                          ||           | j        j        d         | j        j        d         z
  | _        t          | j        j        d         | j	        z
            | _
        d S )NTr      )r   
instrumentinstrument_namessuper__init__exogshapedf_residfloat
k_constantdf_model)selfendogr   r   	__class__s       r   r   zIV2SLS.__init__d   sy    1;J1M1M..%%% 	*TY_Q-??dioa04?BCCr   c                 6    | j         | _        | j        | _        d S N)r&   wendogr   wexogr%   s    r   
initializezIV2SLS.initializen   s    jY


r   c                     dS )zNot implementedN )r%   Xs     r   whitenzIV2SLS.whitenr   s    r   c                 R   | j         | j        | j        }}}t          j        |j        |          }t          j        |j        |          }t          j                            ||          x| _        }t          j        ||          x}}t          j        |j        |          }	|	| _	        t          j        |j        |          }
t          j        |j        |          }t          j                            |	|          }t          j        
                    |
          }t          j        |j        t          j        |	|                    | _        t          | || j                  }||_        ||_        t          ||                                          | _        t%          |          S )a  estimate model using 2SLS IV regression

        Returns
        -------
        results : instance of RegressionResults
           regression result

        Notes
        -----
        This returns a generic RegressioResults instance as defined for the
        linear models.

        Parameter estimates and covariance are correct, but other results
        have not been tested yet, to see whether they apply without changes.

        )normalized_cov_params)r&   r   r   r   dotTlinalgsolve
xhatparamsxhatprodinvr3   IVRegressionResultsexog_hat_paramsexog_hatr
   fit_results_ols2ndr   )r%   yr   zztzztxr8   FxhatFtFFtxFtyparamsFtxinvlfits                  r   r>   z
IV2SLS.fitv   sK   & 
DItA!fQS!nnfQS!nn')ysC'@'@@*6!Z(((DfQS!nnfQS!nnfQS!nnc**s##%'VFHbfS&6I6I%J%J""4-1-GI I I  *"1d||//11'---r   c                 >    || j         }t          j        ||          S )a  
        Return linear predicted values from a design matrix.

        Parameters
        ----------
        exog : array_like
            Design / exogenous data
        params : array_like, optional after fit has been called
            Parameters of a linear model

        Returns
        -------
        An array of fitted values

        Notes
        -----
        If the model as not yet been fit, params is not optional.
        r   r   r4   r%   rI   r   s      r   predictzIV2SLS.predict   s"    & <9DvdF###r   r)   )
__name__
__module____qualname____doc__r   r-   r1   r>   rO   __classcell__r'   s   @r   r   r   J   s         2D D D D D D    ). ). ).X$ $ $ $ $ $ $ $r   r   c                   8    e Zd ZdZed             ZddZddZdS )	r;   a1  
    Results class for for an OLS model.

    Most of the methods and attributes are inherited from RegressionResults.
    The special methods that are only available for OLS are:

    - get_influence
    - outlier_test
    - el_test
    - conf_int_el

    See Also
    --------
    RegressionResults
    c                     | j         j        j        }|t          j        S t          | j                  }t          j        |          }t          |          }||= | 	                    ||                   j
        }|S r)   )modeldata	const_idxr   nanlenrI   eyer   f_testfvalue)r%   rZ   k_varsrestrictionidx_noconstantfvals         r   r_   zIVRegressionResults.fvalue   sh    JO-	6M%%F&..K#F^^Ny);;{>:;;BDKr   Nc                 R   | j         j        | j         j        }}t          ||                                          }|j         j        }|j        t          |          z  }| j        |j        z
  }t          j
                            | j         j                  |z
  }|st          j
                            |          }t          j
                            |          }	t          j        |t          j        |	|                    |z  }
t          j                            |
|          }|
||fS )zHausman's specification test

        See Also
        --------
        spec_hausman : generic function for Hausman's specification test

        )rX   r&   r   r
   r>   r3   ssrr\   rI   r   r6   pinvr9   matrix_rankr4   r   chi2sf)r%   dofr&   r   resolsnormalized_cov_params_olsse2params_diffcov_diffcov_diffpinvHpvals               r   spec_hausmanz IVRegressionResults.spec_hausman   s     j&
tUD!!%%''$*L$F! j3u::%kFM19>>$*"5669RR  	2)''11Cy~~h//F;|[ A ABB3Fz}}Q$$$|r   皙?c                    ddl m}m}m}  || j                  \  }}	}
} || j                  \  }}| j        j        }t          j        	                    t          j
        |j        |                    }t          j        |          }t          j        |d         |d         z            }t          ||	|
|||||d                   | _        ddddgfd	d
gfdddddg	}dd| j        z  gfdd| j        z  gfdd| j        z  gfdd| j        z  gfg}dd|z  gfdd|z  gfdd|
z  gfdd|z  gfg}dd || j                  z  gfdd|z  gfdd|	z  gfd d|z  gfg}|| j        j        j        d"z   d#z   }dd$lm}  |            }|                    | |||||%           |                    | |||d&'           |                    | ||||d	%           |S )(  Summarize the Regression Results

        Parameters
        ----------
        yname : str, optional
            Default is `y`
        xname : list[str], optional
            Default is `var_##` for ## in p the number of regressors
        title : str, optional
            Title for the top table. If not None, then this replaces the
            default title
        alpha : float
            significance level for the confidence intervals

        Returns
        -------
        smry : Summary instance
            this holds the summary tables and text, which can be printed or
            converted to various output formats.

        See Also
        --------
        statsmodels.iolib.summary.Summary : class to hold summary
            results
        r   )jarque_beraomni_normtestdurbin_watson)jbjbpvskewkurtosisomniomnipvcondno	mineigvalzDep. Variable:NzModel:NMethod:z	Two Stage zLeast SquareszDate:NzTime:NzNo. Observations:N)zDf Residuals:N)z	Df Model:Nz
R-squared:z%#8.3fzAdj. R-squared:zF-statistic:%#8.4gzProb (F-statistic):%#6.3gzOmnibus:z%#6.3fzProb(Omnibus):zSkew:z	Kurtosis:zDurbin-Watson:zJarque-Bera (JB):z	Prob(JB):z%#8.3gz	Cond. No.N zRegression ResultsSummarygleftgrightynamexnametitleTr   r   alphause_t)statsmodels.stats.stattoolsrw   rx   ry   wresidrX   r+   r   r6   eigvalshr4   r5   sortsqrtdictdiagnrsquaredrsquared_adjr_   f_pvaluer'   rP   statsmodels.iolib.summaryr   add_table_2colsadd_table_params)r%   r   r   r   r   rw   rx   ry   r{   r|   r}   r~   r   r   r+   eigvalsr   top_left	top_right
diagn_leftdiagn_rightr   smrys                          r   summaryzIVRegressionResults.summary   s   8	. 	. 	. 	. 	. 	. 	. 	. 	. 	.#.;t{#;#; D$$}T[11f 
 )$$RVEGU%;%;<<''""WQZ/00
 Rd#F6$+AJ0 0 0
 -$./*+##/+'	 #X%=$>?'(T5F*F)GH$x$+'=&>@+h.F-GH	 "HtO#45'(V*;)<=D 12"X%8$9:
 )8mmDK6P6P+P*QR+hm_=#ho%67#h&7%89 =J(1C7:NNE 	655555wyyT) %U% 	 	A 	A 	Ad%uE#' 	 	) 	) 	) 	TK %U " 	 	$ 	$ 	$ r   r)   NNNrt   )rP   rQ   rR   rS   r   r_   rs   r   r/   r   r   r;   r;      sh             ^   Bi i i i i ir   r;   a  
Options for GMM
---------------

Type of GMM
~~~~~~~~~~~

 - one-step
 - iterated
 - CUE : not tested yet

weight matrix
~~~~~~~~~~~~~

 - `weights_method` : str, defines method for robust
   Options here are similar to :mod:`statsmodels.stats.robust_covariance`
   default is heteroscedasticity consistent, HC0

   currently available methods are

   - `cov` : HC0, optionally with degrees of freedom correction
   - `hac` :
   - `iid` : untested, only for Z*u case, IV cases with u as error indep of Z
   - `ac` : not available yet
   - `cluster` : not connected yet
   - others from robust_covariance

other arguments:

 - `wargs` : tuple or dict, required arguments for weights_method

   - `centered` : bool,
     indicates whether moments are centered for the calculation of the weights
     and covariance matrix, applies to all weight_methods
   - `ddof` : int
     degrees of freedom correction, applies currently only to `cov`
   - maxlag : int
     number of lags to include in HAC calculation , applies only to `hac`
   - others not yet, e.g. groups for cluster robust

covariance matrix
~~~~~~~~~~~~~~~~~

The same options as for weight matrix also apply to the calculation of the
estimate of the covariance matrix of the parameter estimates.
The additional option is

 - `has_optimal_weights`: If true, then the calculation of the covariance
   matrix assumes that we have optimal GMM with :math:`W = S^{-1}`.
   Default is True.
   TODO: do we want to have a different default after `onestep`?


c                        e Zd ZdZdZ	 	 d fd	Zd ZddZddZ	 	 	 	 ddZ	ddZ
d dZd!dZd Z	 	 d"dZ	 	 	 d#dZ	 	 d$dZd Zd%dZd&dZd&dZ xZS )'GMMa  
    Class for estimation by Generalized Method of Moments

    needs to be subclassed, where the subclass defined the moment conditions
    `momcond`

    Parameters
    ----------
    endog : ndarray
        endogenous variable, see notes
    exog : ndarray
        array of exogenous variables, see notes
    instrument : ndarray
        array of instruments, see notes
    nmoms : None or int
        number of moment conditions, if None then it is set equal to the
        number of columns of instruments. Mainly needed to determine the shape
        or size of start parameters and starting weighting matrix.
    kwds : anything
        this is mainly if additional variables need to be stored for the
        calculations of the moment conditions

    Attributes
    ----------
    results : instance of GMMResults
        currently just a storage class for params and cov_params without it's
        own methods
    bse : property
        return bse



    Notes
    -----
    The GMM class only uses the moment conditions and does not use any data
    directly. endog, exog, instrument and kwds in the creation of the class
    instance are only used to store them for access in the moment conditions.
    Which of this are required and how they are used depends on the moment
    conditions of the subclass.

    Warning:

    Options for various methods have not been fully implemented and
    are still missing in several methods.


    TODO:
    currently onestep (maxiter=0) still produces an updated estimate of bse
    and cov_params.

    
GMMResultsNnonec                    |                      ||          }t                                          ||||           |j        d         | _        ||| _        n&||j        d         | _        nt          j        | _        ||| _        n&||j        d         | _        nt          j        | _        | j	        
                    |           d| _        dS )z
        maybe drop and use mixin instead

        TODO: GMM does not really care about the data, just the moment conditions
        )missingr   r   Nr   ư>)_check_inputsr   r   r    nobsnmomsr   r[   k_params__dict__updateepsilon_iter)	r%   r&   r   r   k_momsr   r   kwdsr'   s	           r   r   zGMM.__init__  s     ''
E::
g% 	 	' 	' 	'
 KN	DJJ##)!,DJJDJ$DMM# JqMDMMFDMT""" r   c                     |?t          j        |          }|j        d         |j        d         k    rt          d          |S )Nr   z*instrument is not the same length as endog)r   asarrayr    
ValueError)r%   r   r&   offsets       r   r   zGMM._check_inputs  sC    !Z
++F|A%+a.00 !MNNNr   c                    | j         j        }|=t          |          t          |          k    r|| j         _        d S t          d          t          |          t          |          k     r$|t          |           d          | j         _        d S t          |          t          |          k    r2d t	          t          |                    D             | j         _        d S d S )N param_names has the wrong lengthc                     g | ]}d |z  S )zp%2dr/   ).0is     r   
<listcomp>z(GMM._fix_param_names.<locals>.<listcomp>  s    #K#K#K1FQJ#K#K#Kr   )rY   xnamesr\   r   range)r%   rI   param_namesr   s       r   _fix_param_nameszGMM._fix_param_names  s    !"6{{c+....#.	    !CDDD 6{{S[[((#)3v;;,--#8	   Vs6{{**#K#Kc&kk8J8J#K#K#K	    +*r   c                     ||| _         n| j         }|t          |          k    r|| j        _        dS t	          d          )a  set the parameter names in the model

        Parameters
        ----------
        param_names : list[str]
            param_names should have the same length as the number of params
        k_params : None or int
            If k_params is None, then the k_params attribute is used, unless
            it is None.
            If k_params is not None, then it will also set the k_params
            attribute.
        Nr   )r   r\   rY   r   r   )r%   r   r   s      r   set_param_nameszGMM.set_param_names  sL     $DMM}Hs;''''*DI?@@@r   
   covr/   Tbfgsc	           	      f   |}	|	|                                  }	|| |i }d|vrd|d<   |dk    s|dk    rT| t          j                            |          }
n|                     d          }
|                     |	|
||          }|
}n>|                     |	||||||	          \  }}
t          j                            |
          }|dk    r|                     |||
          }| j        }
|||d}| 	                    |d           t          | j                 | ||
|||          }|| _        |S )ac  
        Estimate parameters using GMM and return GMMResults

        TODO: weight and covariance arguments still need to be made consistent
        with similar options in other models,
        see RegressionResult.get_robustcov_results

        Parameters
        ----------
        start_params : array (optional)
            starting value for parameters ub minimization. If None then
            fitstart method is called for the starting values.
        maxiter : int or 'cue'
            Number of iterations in iterated GMM. The onestep estimate can be
            obtained with maxiter=0 or 1. If maxiter is large, then the
            iteration will stop either at maxiter or on convergence of the
            parameters (TODO: no options for convergence criteria yet.)
            If `maxiter == 'cue'`, the the continuously updated GMM is
            calculated which updates the weight matrix during the minimization
            of the GMM objective function. The CUE estimation uses the onestep
            parameters as starting values.
        inv_weights : None or ndarray
            inverse of the starting weighting matrix. If inv_weights are not
            given then the method `start_weights` is used which depends on
            the subclass, for IV subclasses `inv_weights = z'z` where `z` are
            the instruments, otherwise an identity matrix is used.
        weights_method : str, defines method for robust
            Options here are similar to :mod:`statsmodels.stats.robust_covariance`
            default is heteroscedasticity consistent, HC0

            currently available methods are

            - `cov` : HC0, optionally with degrees of freedom correction
            - `hac` :
            - `iid` : untested, only for Z*u case, IV cases with u as error indep of Z
            - `ac` : not available yet
            - `cluster` : not connected yet
            - others from robust_covariance

        wargs` : tuple or dict,
            required and optional arguments for weights_method

            - `centered` : bool,
              indicates whether moments are centered for the calculation of the weights
              and covariance matrix, applies to all weight_methods
            - `ddof` : int
              degrees of freedom correction, applies currently only to `cov`
            - `maxlag` : int
              number of lags to include in HAC calculation , applies only to `hac`
            - others not yet, e.g. groups for cluster robust

        has_optimal_weights: If true, then the calculation of the covariance
              matrix assumes that we have optimal GMM with :math:`W = S^{-1}`.
              Default is True.
              TODO: do we want to have a different default after `onestep`?
        optim_method : str, default is 'bfgs'
            numerical optimization method. Currently not all optimizers that
            are available in LikelihoodModels are connected.
        optim_args : dict
            keyword arguments for the numerical optimizer.

        Returns
        -------
        results : instance of GMMResults
            this is also attached as attribute results

        Notes
        -----

        Warning: One-step estimation, `maxiter` either 0 or 1, still has
        problems (at least compared to Stata's gmm).
        By default it uses a heteroscedasticity robust covariance matrix, but
        uses the assumption that the weight matrix is optimal.
        See options for cov_params in the results instance.

        The same options as for weight matrix also apply to the calculation of
        the estimate of the covariance matrix of the parameter estimates.

        Ndispr   r   cueFr:   weightsoptim_method
optim_args)maxiterstart_invweightsweights_methodwargsr   r   )r   r   )r   has_optimal_weightsr   )r   )rX   rI   r   r   options_otherr   )fitstartr   r6   rf   start_weightsfitgmmfititer	fitgmm_cu_weights_cur   results_class_dictresults_classresults)r%   start_paramsr   inv_weightsr   r   r   r   r   startr   rI   weights_r   r   s                  r   r>   zGMM.fit+  s   x =MMOOEKJ##!"Jva<<7e++&)..55 ,,,77[[.:z ! S SFHH"ll53:<G:H168D6@ + B BOFG y~~g..He ^^F2>0: $ < <F &G +9/B(46 6
 	f$777$T%78041729058E5?A A A r   c                 <   ||                      d          }|i }|dk    rt          j        }n|dk    rt          j        }| j        |d<   n|dk    rt          j        }| j        |d<   no|dk    rt          j        }| j        |d<   nR|d	k    rt          j        }| j        |d<   n5|d
k    rt          j        }n"|dk    rt          j	        }nt          d          t          r,t          t          j                            |                      || j        |fd|fi|S )a  estimate parameters using GMM

        Parameters
        ----------
        start : array_like
            starting values for minimization
        weights : ndarray
            weighting matrix for moment conditions. If weights is None, then
            the identity matrix is used


        Returns
        -------
        paramest : ndarray
            estimated parameters

        Notes
        -----
        todo: add fixed parameter option, not here ???

        uses scipy.optimize.fmin

        NFr   nmr   fprimencgcgfmin_l_bfgs_bpowellslsqpoptimizer method not availableargs)r   r   fmin	fmin_bfgsscorefmin_ncgfmin_cgr   fmin_powell
fmin_slsqpr   DEBUGprintr   r6   detgmmobjective)r%   r   r   r   r   	optimizers         r   r   z
GMM.fitgmm  sV   8 ?((U(33GJ4 IIV## *I#':Jx  U"" )I#':Jx  T!! (I#':Jx  _,, .I#':Jx  X%% ,IIW$$ +II=>>> 	*")--(())) y*E ' '
 '%' ' 	'r   c                     |i }|dk    rt           j        }n?|dk    rt           j        }| j        |d<   n"|dk    rt           j        }nt          d           || j        |fddi|S )	a  estimate parameters using continuously updating GMM

        Parameters
        ----------
        start : array_like
            starting values for minimization

        Returns
        -------
        paramest : ndarray
            estimated parameters

        Notes
        -----
        todo: add fixed parameter option, not here ???

        uses scipy.optimize.fmin

        Nr   r   r   r   r   r   r/   )r   r   r   score_cur   r   gmmobjective_cu)r%   r   r   r   r   s        r   r   zGMM.fitgmm_cu  s    . J4 IIV## *I#'=Jx  U"" )II=>>> y-uLL2LLLLr   c                 4    t          j        | j                  S )z+Create identity matrix for starting weights)r   r]   r   )r%   r:   s     r   r   zGMM.start_weights+  s    vdj!!!r   c                 |    |                      |          }t          j        t          j        ||          |          S )aL  
        objective function for GMM minimization

        Parameters
        ----------
        params : ndarray
            parameter values at which objective is evaluated
        weights : ndarray
            weighting matrix

        Returns
        -------
        jval : float
            value of objective function

        )momcond_meanr   r4   )r%   rI   r   momss       r   r   zGMM.gmmobjective/  s5    "   ((vbfT7++T222r   c                 D   |                      |          }|                     |||          }t          j                            |          }|| _        t          j        t          j        |                    d          |          |                    d                    S )a,  
        objective function for continuously updating  GMM minimization

        Parameters
        ----------
        params : ndarray
            parameter values at which objective is evaluated

        Returns
        -------
        jval : float
            value of objective function

        )r   r   r   )momcondcalc_weightmatrixr   r6   rf   r   r4   mean)r%   rI   r   r   r  r   r   s          r   r  zGMM.gmmobjective_cuF  s      ||F##,,T.38 - : :)..--"vbfTYYq\\733TYYq\\BBBr      c                    g | _         | j        }||                     d          }	n|}	|	}
t          |          D ]}|
}t          j                            |          }	|                     ||	||          } ||          }|                     ||||          }
|dk    rt          ||z
            | j
        k     r n|}||	fS )a  iterative estimation with updating of optimal weighting matrix

        stopping criteria are maxiter or change in parameter estimate less
        than self.epsilon_iter, with default 1e-6.

        Parameters
        ----------
        start : ndarray
            starting value for parameters
        maxiter : int
            maximum number of iterations
        start_weights : array (nmoms, nmoms)
            initial weighting matrix; if None, then the identity matrix
            is used
        weights_method : {'cov', ...}
            method to use to estimate the optimal weighting matrix,
            see calc_weightmatrix for details

        Returns
        -------
        params : ndarray
            estimated parameters
        weights : ndarray
            optimal weighting matrix calculated with final parameter
            estimates

        Notes
        -----




        NTr   r   r   r   rI   r  )historyr	  r   r   r   r6   rf   r   r
  r   r   )r%   r   r   r   r   r   r   r   r	  wwinv_newitwinvresgmmr  s                  r   r   zGMM.fititer^  s    H ,#""t",,AA A
 .. 	 	BD	t$$A [[,6 ! 8 8F 76??D--d=K49& . J JH Avv&%0043DDD EEqyr   c           	      .   |j         \  }}t          rt          d|           d|v o|d           }|s|}n||                                z
  }|dk    rmt	          j        |j        |          }	d|v rH|d         dk    r|	|| j        z
  z  }	n
t          rt          d|d                    |	||d         z
  z  }	n|	|z  }	n|dk    rd|vrt          d	          |d         }
t	          j	        |
d
z             }t	          j        |j        |          |z  }	t          d
|
d
z             D ]?}|	||         t	          j        ||d         j        |d|                    z  ||z
  z  z  }	@n,|dk    rC|d         }
d|v r	|d         }nt          j        }||d<   t          j        ||
|          }	|	|z  }	n|dk    r|                     |          }|r||                    d          z  }| j        }t	          j        |j        |                              t	          j        |j        |                    |z  }	d|v rF|d         dk    r|	|| j        z
  z  }	nAt          rt          d|d                    |	||d         z
  z  }	n|	|z  }	nt          d          |	S )a>  
        calculate omega or the weighting matrix

        Parameters
        ----------
        moms : ndarray
            moment conditions (nobs x nmoms) for all observations evaluated at
            a parameter value
        weights_method : str 'cov'
            If method='cov' is cov then the matrix is calculated as simple
            covariance of the moment conditions.
            see fit method for available aoptions for the weight and covariance
            matrix
        wargs : tuple or dict
            parameters that are required by some kernel methods to
            estimate the long-run covariance. Not used yet.

        Returns
        -------
        w : array (nmoms, nmoms)
            estimate for the weighting matrix or covariance of the moment
            condition


        Notes
        -----

        currently a constant cutoff window is used
        TODO: implement long-run cov estimators, kernel-based

        Newey-West
        Andrews
        Andrews-Moy????

        References
        ----------
        Greene
        Hansen, Bruce

        z momcov wargscenteredr   ddofr   z momcov ddof
flatkernelmaxlagzflatkernel requires maxlagr   Nhackernel)nlagsweights_funciidr   zweight method not available)r    r   r   r  r   r4   r5   r   r   onesr   smcovweights_bartlettS_hac_simple	get_errorr   )r%   r  r   r   rI   r   r   r  moms_r  r  hr   r  ur   s                   r   r
  zGMM.calc_weightmatrix  s   T zf 	*/5)))"e+EE*4E0EF 	'EE499;;&E U""uw&&A=J..$./AA =neFm<<<$v./AA T	|++ u$$ !=>>>8_F
##Auw&&t+A1VAX&& I IadRVE!""IKss<<<QGHI u$$8_F5  $X$5".h"50<> > >AIAAu$$ v&&A  QVVAYYJz|Z0044RVAC^^DDtKA=J..$./AA  =neFm<<<$v./AA T	 :;;;r   c                 ~    |                      |          }|j        \  | _        | _        |                    d          S )z-
        mean of moment conditions,

        r   )r	  r    	nobs_momsr   r  )r%   rI   r	  s      r   r  zGMM.momcond_mean(  s5     ,,v&&&-m#||Ar   -C6?c                     | j         }|r*t          |||          t          |||           z   dz  }nt          |||          }|S )a=  gradient of moment conditions

        Parameters
        ----------
        params : ndarray
            parameter at which the moment conditions are evaluated
        epsilon : float
            stepsize for finite difference calculation
        centered : bool
            This refers to the finite difference calculation. If `centered`
            is true, then the centered finite difference calculation is
            used. Otherwise the one-sided forward differences are used.

        TODO: looks like not used yet
              missing argument `weights`

        )epsilonr  )r  r   )r%   rI   r+  r  r	  gradmomss         r   gradient_momcondzGMM.gradient_momcond3  sk    & #  	G%fgwGGG!&'G8DDDEFGHHH %VWgFFFHr   c                 :    t          || j        |f||          }|S )Scorer   r  r+  )r   r   )r%   rI   r   r+  r  derivs         r   r   z	GMM.scoreQ  s0    fd&7wj'/B B B r   c                 8    t          || j        d||          }|S )zScore cur/   r0  )r   r  )r%   rI   r+  r  r1  s        r   r  zGMM.score_cuX  s.    fd&:'/B B B r   )NNr   r)   )Nr   Nr   r/   Tr   N)Nr   N)r   NT)r   r/   )r  Nr   r/   r   N)r   r/   N)r)  T)NT)rP   rQ   rR   rS   r   r   r   r   r   r>   r   r   r   r   r  r   r
  r  r-  r   r  rT   rU   s   @r   r   r     s       2 2h !MFJ! ! ! ! ! !>  L L L L$A A A A0 >B.0&*26W W W Wr=' =' =' ='@%M %M %M %MN" " " "3 3 3. 6; C C C C0 :>AG#E E E EP CE!%   D     <          r   r   c                       e Zd ZdZdZd Zed             Zed             Zd Z		 	 	 ddZ
ed             Zd Zd Zd ZddZdS )r   zjust a storage class right nowFc                     | j                             |           | j        j        | _        t          j        | _        |                                 | _        d S r)   )	r   r   rX   r   r   infr!   _cov_paramscov_params_default)r%   r   r   s      r   r   zGMMResults.__init__f  sF    T"""JO	"&"2"2"4"4r   c                 L    | j                             | j        | j                  S )zObjective function at params)rX   r   rI   r   r,   s    r   qzGMMResults.qn  s      z&&t{DLAAAr   c                 *    | j         | j        j        z  S )z"nobs_moms attached by momcond_mean)r:  rX   r(  r,   s    r   jvalzGMMResults.jvals  s     v
,,,r   c                    d|vr
| j         |d<   d|vr| j        d         |d<   d|vr| j        d         |d<   | j                            | j                  }| j                            | j                  } | j        ||fi |}|S )Nr   r   r   )r   r   rX   r-  rI   r	  calc_cov_params)r%   r   r,  r  	covparamss        r   r7  zGMMResults._cov_paramsx  s     $ JDM4''%)%78H%ID!" ,,*.*<=R*SD&':..t{;;z!!$+..(D(x@@4@@	r   NTr   r/   c                    |j         d         }|| j        }n	 |r|}	n#| j                            |||| j                  }	|rht
          j                            t          j        |j	        t          j        t
          j                            |	          |                              }
nt          j        |j	        |          }t
          j                            t          j        ||                    }t          j        t          j        |t          j        t          j        ||	          |j	                            |          }
|
|z  S )a  calculate covariance of parameter estimates

        not all options tried out yet

        If weights matrix is given, then the formula use to calculate cov_params
        depends on whether has_optimal_weights is true.
        If no weights are given, then the weight matrix is calculated with
        the given method, and has_optimal_weights is assumed to be true.

        (API Note: The latter assumption could be changed if we allow for
        has_optimal_weights=None.)

        r   Nr  )
r    r   rX   r
  rI   r   r6   r:   r4   r5   )r%   r  r,  r   use_weightsr   r   r   r   omegahatr   gwgwginvs                r   r>  zGMMResults.calc_cov_params  s+   " z!}?
 lGG  	DHHz3304?M6;7;{	 4 D DH  	U)--xz$&F29==+B+BH$M$M!O !O P PCC 
G,,BY]]26"h#7#788F&rvb(/C/CRT(J(JKKVTTC 4xr   c                 *    |                                  S )z2standard error of the parameter estimates
        )get_bser,   s    r   bse_zGMMResults.bse_  s     ||~~r   c           	      d    t          j        t          j         | j        di |                    S )a*  standard error of the parameter estimates with options

        Parameters
        ----------
        kwds : optional keywords
            options for calculating cov_params

        Returns
        -------
        bse : ndarray
            estimated standard error of parameter estimates

        r/   )r   r   diag
cov_params)r%   r   s     r   rF  zGMMResults.get_bse  s0     wrwt666677888r   c                     | j         }| j        j        }| j        j        |z
  }|t
          j                            ||          |fS )zoveridentification test

        I guess this is missing a division by nobs,
        what's the normalization in jval ?
        )r<  rI   sizerX   r   r   rh   ri   )r%   jstatnparamsdfs       r   jtestzGMMResults.jtest  sA     	+"Z'ejmmE2..22r   c                     | j         }| j        j        }|j         }|j        j        }||z
  }||z
  }|dk     r| }| }|t          j                            ||          |fS )a  overidentification test for comparing two nested gmm estimates

        This assumes that some moment restrictions have been dropped in one
        of the GMM estimates relative to the other.

        Not tested yet

        We are comparing two separately estimated models, that use different
        weighting matrices. It is not guaranteed that the resulting
        difference is positive.

        TODO: Check in which cases Stata programs use the same weigths

        r   )r<  rX   r   r   rh   ri   )r%   otherjstat1k_moms1jstat2k_moms2jdiffrO  s           r   	compare_jzGMMResults.compare_j  so     *"+#w66 BGEejmmE2..22r   rt   c                 2   |                                  \  }}}ddddgfdddg}dd	|z  gfd
d|z  gfg}	|| j        j        j        dz   dz   }ddlm}
  |
            }|                    | ||	|||           |                    | |||| j                   |S )rv   r   r   r   r   r   r   r   z	Hansen J:r   zProb (Hansen J):r   Nr   Resultsr   r   r   r   )	rP  rX   r'   rP   r   r   r   r   r   )r%   r   r   r   r   jvaluejpvaluejdfr   r   r   r   s               r   r   zGMMResults.summary  s    8  $zz||,$(##/ "Hv$5#68(8g+=*>?		 =J(1C7)CE 	655555wyyT)#(U 	 	D 	D 	Dd%uE$(J 	 	0 	0 	0 r   )NFTr   r/   r   )rP   rQ   rR   rS   r   r   r   r:  r<  r7  r>  propertyrG  rF  rP  rX  r   r/   r   r   r   r   a  s        ((E5 5 5 B B ^B - - ^-  4 INBFJL2 2 2 2h   X
9 9 9 
3 
3 
33 3 38> > > > > >r   r   c                   8    e Zd ZdZdZd Zd
dZd ZddZd	 Z	dS )IVGMMaS  
    Basic class for instrumental variables estimation using GMM

    A linear function for the conditional mean is defined as default but the
    methods should be overwritten by subclasses, currently `LinearIVGMM` and
    `NonlinearIVGMM` are implemented as subclasses.

    See Also
    --------
    LinearIVGMM
    NonlinearIVGMM

    IVGMMResultsc                 J    t          j        | j        j        d                   S )zCreate array of zerosr   r   zerosr   r    r,   s    r   r   zIVGMM.fitstartX      x	*+++r   Tc                     t          j        | j        j        | j                  }| j        j        d         }|r||z  S t           j                            ||z            S )zStarting weightsr   )r   r4   r   r5   r    r6   rf   )r%   r:   zzr   s       r   r   zIVGMM.start_weights\  sS    VDO%t77$Q' 	-99>>"t),,,r   c                 <    | j         |                     |          z
  S )zGet error at params)r&   rO   )r%   rI   s     r   r#  zIVGMM.get_errore  s    zDLL0000r   Nc                 >    || j         }t          j        ||          S )zGet prediction at paramsrM   rN   s      r   rO   zIVGMM.predicti  s     <9DvdF###r   c                 T    | j         }||                     |          dddf         z  S )zError times instrumentN)r   r#  )r%   rI   r   s      r   r	  zIVGMM.momcondp  s-    _
DNN622111d7;;;r   r3  r)   )
rP   rQ   rR   rS   r   r   r   r#  rO   r	  r/   r   r   r`  r`  G  sz          #M, , ,- - - -1 1 1$ $ $ $< < < < <r   r`  c                   .    e Zd ZdZddZddZd Zd ZdS )	LinearIVGMMa5  class for linear instrumental variables models estimated with GMM

    Uses closed form expression instead of nonlinear optimizers for each step
    of the iterative GMM.

    The model is assumed to have the following moment condition

        E( z * (y - x beta)) = 0

    Where `y` is the dependent endogenous variable, `x` are the explanatory
    variables and `z` are the instruments. Variables in `x` that are exogenous
    need also be included in `z`.

    Notation Warning: our name `exog` stands for the explanatory variables,
    and includes both exogenous and explanatory variables that are endogenous,
    i.e. included endogenous variables

    Parameters
    ----------
    endog : array_like
        dependent endogenous variable
    exog : array_like
        explanatory, right hand side variables, including explanatory variables
        that are endogenous
    instrument : array_like
        Instrumental variables, variables that are exogenous to the error
        in the linear model containing both included and excluded exogenous
        variables
    Nc                    ||                      d          }| j        | j        | j        }}}t	          j        |j        |          }t	          j        |j        |          }	|j                            |          }
|
                    |          }|
                    |	          }t          j                            |                              |          }|S )a  estimate parameters using GMM for linear model

        Uses closed form expression instead of nonlinear optimizers

        Parameters
        ----------
        start : not used
            starting values for minimization, not used, only for consistency
            of method signature
        weights : ndarray
            weighting matrix for moment conditions. If weights is None, then
            the identity matrix is used
        optim_method : not used,
            optimization method, not used, only for consistency of method
            signature
        **kwds : keyword arguments
            not used, will be silently ignored (for compatibility with generic)


        Returns
        -------
        paramest : ndarray
            estimated parameters

        NFr   )	r   r&   r   r   r   r4   r5   r6   rf   )r%   r   r   r   r   r@   r   rA   zTxzTypart0part1part2rI   s                 r   r   zLinearIVGMM.fitgmm  s    < ?((U(33G*dia1fQS!nnfQS!nn		'""		#		#&&**511r   c                 >    || j         }t          j        ||          S r)   rM   rN   s      r   rO   zLinearIVGMM.predict  s     <9DvdF###r   c                 h    | j         | j        }}t          j        |j        |           | j        z  }|S r)   )r   r   r   r4   r5   r   )r%   rI   r   r   rA   r,  s         r   r-  zLinearIVGMM.gradient_momcond  s0     y$/1F13NN?TY.r   c           	      ,   | j         | j        }}|j        d         }|                     |          }dt	          j        |j        |                              |                    t	          j        |j        |                              z  }|||z  z  }|S )Nr   )r   r   r    
get_errorsr   r4   r5   )	r%   rI   r   r   r   rA   r   r&  r   s	            r   r   zLinearIVGMM.score  s|     y$/1wqzOOF##RVAC^^''BF13NN(C(CDDDr   )NNr)   )rP   rQ   rR   rS   r   rO   r-  r   r/   r   r   rl  rl  v  se         <+ + + +\$ $ $ $      r   rl  c                   J     e Zd ZdZd Z fdZd
dZddZ	 	 ddZd	 Z	 xZ
S )NonlinearIVGMMa7  
    Class for non-linear instrumental variables estimation using GMM

    The model is assumed to have the following moment condition

        E[ z * (y - f(X, beta)] = 0

    Where `y` is the dependent endogenous variable, `x` are the explanatory
    variables and `z` are the instruments. Variables in `x` that are exogenous
    need also be included in z. `f` is a nonlinear function.

    Notation Warning: our name `exog` stands for the explanatory variables,
    and includes both exogenous and explanatory variables that are endogenous,
    i.e. included endogenous variables

    Parameters
    ----------
    endog : array_like
        dependent endogenous variable
    exog : array_like
        explanatory, right hand side variables, including explanatory variables
        that are endogenous.
    instruments : array_like
        Instrumental variables, variables that are exogenous to the error
        in the linear model containing both included and excluded exogenous
        variables
    func : callable
        function for the mean or conditional expectation of the endogenous
        variable. The function will be called with parameters and the array of
        explanatory, right hand side variables, `func(params, exog)`

    Notes
    -----
    This class uses numerical differences to obtain the derivative of the
    objective function. If the jacobian of the conditional mean function, `func`
    is available, then it can be used by subclassing this class and defining
    a method `jac_func`.

    TODO: check required signature of jac_error and jac_func
    c                 J    t          j        | j        j        d                   S )Nr   rc  r,   s    r   r   zNonlinearIVGMM.fitstart  re  r   c                 N    || _          t                      j        |||fi | d S r)   )funcr   r   )r%   r&   r   r   r|  r   r'   s         r   r   zNonlinearIVGMM.__init__  s3    	j99D99999r   Nc                 @    || j         }|                     ||          S r)   )r   r|  rN   s      r   rO   zNonlinearIVGMM.predict  s"    <9Dyy&&&r   Tc                 D    t          || j        | j        f||          }|S )Nr0  )r   r|  r   )r%   rI   r   r   r  r+  r1  s          r   jac_funczNonlinearIVGMM.jac_func!  s3     fdityl'/B B B r   c                 <    |                      ||d dd           }| S )NTr0  )r  )r%   rI   r   r   r  r+  r  s          r   	jac_errorzNonlinearIVGMM.jac_error*  s1     ==td)- ! / / yr   c                 X   | j         }|j        d         }|                     ||d d d          }| }|                     |          }dt	          j        t	          j        |j        |          |                              t	          j        |j        |                    z  }	|	||z  z  }	|	S )Nr   T)r   r+  r  rv  )r   r    r  r#  r   r4   r5   )
r%   rI   r   r   rA   r   jac_ur   r&  r   s
             r   r   zNonlinearIVGMM.score3  s     OwqzvwT4(,  . .FNN6""RVBF13NNG4488QHHHr   r)   )NTN)rP   rQ   rR   rS   r   r   rO   r  r  r   rT   rU   s   @r   ry  ry    s        ' 'X, , ,
: : : : :
' ' ' '    >B         r   ry  c                   T    e Zd ZdZed             Zed             Zed             ZdS )ra  zResults class of IVGMMc                 @    | j                             | j                  S )zFitted values)rX   rO   rI   r,   s    r   fittedvalueszIVGMMResults.fittedvaluesJ  s     z!!$+...r   c                 *    | j         j        | j        z
  S )	Residuals)rX   r&   r  r,   s    r   residzIVGMMResults.residP  s     z$"333r   c                 F    | j         | j         z                      d          S )zSum of square errorsr   )r  sumr,   s    r   re   zIVGMMResults.ssrV  s!     
TZ',,Q///r   N)rP   rQ   rR   rS   r   r  r  re   r/   r   r   ra  ra  F  sk           / / ^/
 4 4 ^4
 0 0 ^0 0 0r   ra  c                 p   || z
  }||z
  }|st           j                            |          }t           j                            |          }t          j        |t          j        ||                    }t
          j                            ||          }	t           j                            |          }
||	||
fS )a  Hausmans specification test

    Parameters
    ----------
    params_e : ndarray
        efficient and consistent under Null hypothesis,
        inconsistent under alternative hypothesis
    params_i : ndarray
        consistent under Null hypothesis,
        consistent under alternative hypothesis
    cov_params_e : ndarray, 2d
        covariance matrix of parameter estimates for params_e
    cov_params_i : ndarray, 2d
        covariance matrix of parameter estimates for params_i

    example instrumental variables OLS estimator is `e`, IV estimator is `i`


    Notes
    -----

    Todos,Issues
    - check dof calculations and verify for linear case
    - check one-sided hypothesis


    References
    ----------
    Greene section 5.5 p.82/83


    )	r   r6   rg   rf   r4   r   rh   ri   r   )params_eparams_icov_params_ecov_params_irj   rn   ro   rp   rq   rr   evalss              r   rs   rs   ^  s    B h&Kl*H  .i##H--9>>(++L
{BF<==>>A:==C  DIx((EdCr   c                   6     e Zd ZdZ fdZd Zd ZddZ xZS )	DistQuantilesGMMz
    Estimate distribution parameters by GMM based on matching quantiles

    Currently mainly to try out different requirements for GMM when we cannot
    calculate the optimal weighting matrix.

    c                    t                                          ||           d| _        |d         | _        | _        d|vrt          j        g d          x| _        }n|d         x| _        }t          j        fd|dz  D                       | _        t          | j                  | _
        | _        || _        || _        t          |           | _        d| _        d S )	Ngh㈵>distfnpquant)g{Gz?rt   g?g?g333333?g?gffffff?gGz?c                 :    g | ]}t          j        |          S r/   )r   scoreatpercentile)r   pr&   s     r   r   z-DistQuantilesGMM.__init__.<locals>.<listcomp>  s2      /  /  /a 7q A A  /  /  /r   d   )rX   r   )r   r   r   r  r&   r   arrayr  xquantr\   r   r   r   r   r   )r%   r&   r   r   r   r  r'   s    `    r   r   zDistQuantilesGMM.__init__  s    j111 8n 
 4#%8,R,R,R#S#SSDK&&#'>1DK& h  /  /  /  /#)#: /  /  / 0 0%%
 
	$!--- r   c                     | j         }t          |d          r|                    | j                  }ndg|j        z  ddgz   }t          j        |          S )N	_fitstartr   g        g      ?)r  hasattrr  r&   numargsr   r   )r%   r  r   s      r   r   zDistQuantilesGMM.fitstart  s[     6;'' 	1$$TZ00EEC&"R0Ez%   r   c                     t          |          dk    r|\  }}nt          |          dk    r|\  }}}n	 | j        | j        }} | j        j        |g|R  |z
  }t          j        |          S )a  moment conditions for estimating distribution parameters by matching
        quantiles, defines as many moment conditions as quantiles.

        Returns
        -------
        difference : ndarray
            difference between theoretical and empirical quantiles

        Notes
        -----
        This can be used for method of moments or for generalized method of
        moments.

        r     )r\   r  r  r  cdfr   
atleast_2d)r%   rI   locscaler    pqxqcdfdiffs           r   r	  zDistQuantilesGMM.momcond  s    " v;;!JC[[A &E3  dkB!$+/".v...3}W%%%r   NFc                    |t          j        | j                  }|                     |          }|| j        _        i | j        _        ddi| j        _        | j                            ||          }|| j        _	        | 
                    ||          | j        _        | j        j                            d|i           | j        S )a  fit without estimating an optimal weighting matrix and return results

        This is a convenience function that calls fitgmm and covparams with
        a given weight matrix or the identity weight matrix.
        This is useful if the optimal weight matrix is know (or is analytically
        given) or if an optimal weight matrix cannot be calculated.

        (Developer Notes: this function could go into GMM, but is needed in this
        class, at least at the moment.)

        Parameters
        ----------


        Returns
        -------
        results : GMMResult instance
            result instance with params and _cov_params attached

        See Also
        --------
        fitgmm
        cov_params

        N)r   r   r   )r   r   r   )r   r]   r   r   r   rI   r   r   rJ  r   r   r<  r   )r%   r   r   r   rI   r7  s         r   fitoncezDistQuantilesGMM.fitonce  s    4 ?fTZ((G5))$&6u%="l--g:M . O O  ' --fg>>"))+@AT*UVVV|r   )NNF)	rP   rQ   rR   rS   r   r   r	  r  rT   rU   s   @r   r  r    su         ! ! ! ! !<	! 	! 	!& & &B) ) ) ) ) ) ) )r   r  )r   ra  r  r)   )'rS   statsmodels.compat.pythonr   numpyr   scipyr   r   statsmodels.tools.numdiffr   statsmodels.base.modelr   r   r	   #statsmodels.regression.linear_modelr
   r   r   %statsmodels.stats.sandwich_covariancesandwich_covariancer   statsmodels.tools.decoratorsr   statsmodels.tools.toolsr   r   r   r   r;   _gmm_optionsr   r   r`  rl  ry  ra  rs   r  r   r/   r   r   <module>r     s  / /d - , , , , ,     ! ! ! ! ! ! ! ! 3 3 3 3 3 3M M M M M M M M M MK K K K K K K K K K 5 5 5 5 5 5 5 5 5 7 7 7 7 7 7 . . . . . .	  n$ n$ n$ n$ n$_ n$ n$ n$bj j j j j+ j j jb6py
 y
 y
 y
 y
% y
 y
 y
zb b b b b' b b bL,< ,< ,< ,< ,<C ,< ,< ,<^g g g g g% g g gVb b b b bU b b bJ0 0 0 0 0: 0 0 00. . . .j| | | | |s | | |~ %/&2*46 6   r   