
    M/PhC                         d Z ddlmZ ddlZddlmZ ddlmZm	Z	m
Z
 ddlmZ  G d de	          Z G d	 d
e
          ZddZddZd ZddZdS )a  linear model with Theil prior probabilistic restrictions, generalized Ridge

Created on Tue Dec 20 00:10:10 2011

Author: Josef Perktold
License: BSD-3

open issues
* selection of smoothing factor, strength of prior, cross validation
* GLS, does this really work this way
* None of inherited results have been checked yet,
  I'm not sure if any need to be adjusted or if only interpretation changes
  One question is which results are based on likelihood (residuals) and which
  are based on "posterior" as for example bse and cov_params

* helper functions to construct priors?
* increasing penalization for ordered regressors, e.g. polynomials

* compare with random/mixed effects/coefficient, like estimated priors



there is something fishy with the result instance, some things, e.g.
normalized_cov_params, do not look like they update correctly as we
search over lambda -> some stale state again ?

I added df_model to result class using the hatmatrix, but df_model is defined
in model instance not in result instance. -> not clear where refactoring should
occur. df_resid does not get updated correctly.
problem with definition of df_model, it has 1 subtracted for constant



    )lrangeN)cache_readonly)OLSGLSRegressionResults)atleast_2dcolsc                   8     e Zd ZdZ	 	 d
 fd	ZddZdd	Z xZS )TheilGLSa  GLS with stochastic restrictions

    TheilGLS estimates the following linear model

    .. math:: y = X \beta + u

    using additional information given by a stochastic constraint

    .. math:: q = R \beta + v

    :math:`E(u) = 0`, :math:`cov(u) = \Sigma`
    :math:`cov(u, v) = \Sigma_p`, with full rank.

    u and v are assumed to be independent of each other.
    If :math:`E(v) = 0`, then the estimator is unbiased.

    Note: The explanatory variables are not rescaled, the parameter estimates
    not scale equivariant and fitted values are not scale invariant since
    scaling changes the relative penalization weights (for given \Sigma_p).

    Note: GLS is not tested yet, only Sigma is identity is tested

    Notes
    -----

    The parameter estimates solves the moment equation:

    .. math:: (X' \Sigma X + \lambda R' \sigma^2 \Sigma_p^{-1} R) b = X' \Sigma y + \lambda R' \Sigma_p^{-1} q

    :math:`\lambda` is the penalization weight similar to Ridge regression.

    If lambda is zero, then the parameter estimate is the same as OLS. If
    lambda goes to infinity, then the restriction is imposed with equality.
    In the model `pen_weight` is used as name instead of $\lambda$

    R does not have to be square. The number of rows of R can be smaller
    than the number of parameters. In this case not all linear combination
    of parameters are penalized.

    The stochastic constraint can be interpreted in several different ways:

     - The prior information represents parameter estimates from independent
       prior samples.
     - We can consider it just as linear restrictions that we do not want
       to impose without uncertainty.
     - With a full rank square restriction matrix R, the parameter estimate
       is the same as a Bayesian posterior mean for the case of an informative
       normal prior, normal likelihood and known error variance Sigma. If R
       is less than full rank, then it defines a partial prior.

    References
    ----------
    Theil Goldberger

    Baum, Christopher slides for tgmixed in Stata

    (I do not remember what I used when I first wrote the code.)

    Parameters
    ----------
    endog : array_like, 1-D
        dependent or endogenous variable
    exog : array_like, 1D or 2D
        array of explanatory or exogenous variables
    r_matrix : None or array_like, 2D
        array of linear restrictions for stochastic constraint.
        default is identity matrix that does not penalize constant, if constant
        is detected to be in `exog`.
    q_matrix : None or array_like
        mean of the linear restrictions. If None, the it is set to zeros.
    sigma_prior : None or array_like
        A fully specified sigma_prior is a square matrix with the same number
        of rows and columns as there are constraints (number of rows of r_matrix).
        If sigma_prior is None, a scalar or one-dimensional, then a diagonal matrix
        is created.
    sigma : None or array_like
        Sigma is the covariance matrix of the error term that is used in the same
        way as in GLS.
    Nc                     t                                          |||           |t          j        |          }n]	 | j        j        }n# t          $ r d }Y nw xY w|j        d         }t          j        |          }|t          |          }	|	|= ||	         }|j        \  }
}|| _
        || j        j        d         k    rt          d          |t          |          | _        n#t          j        |
          d d d f         | _        | j        j        |
dfk    rt          d          |vt          j        |          }t          j        |          dk    r*t          j        |t          j        |
          z            }n4|j        dk    rt          j        |          }nt          j        |
          }|j        |
|
fk    rt          d          || _        t          j                            |          | _        d S )Nsigma   z8r_matrix needs to have the same number of columnsas exogzq_matrix has wrong shapezsigma_prior has wrong shape)super__init__npasarraydata	const_idxAttributeErrorshapeeyer   r_matrixexog
ValueErrorr   q_matrixzerossizediagonesndimsigma_priorlinalgpinvsigma_prior_inv)selfendogr   r   r   r!   r   r   k_exogkeep_idxk_constraints	__class__s              h/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/sandbox/regression/penalized.pyr   zTheilGLS.__init__|   s   E222z(++HH! I/		! ! ! ! 			! Z]Fvf~~H$!&>>Y'#H- (v TY_Q''' ' ( ( ( *844DMMH]33AAAtG<DM==!"4447888"*[11Kw{##q(( gkBGM4J4J&JKK!Q&& gk22&//K >>>:;;;&!y~~k::s   A AA      ?sandwichTc           	         |}t          | j        | j        | j                                                  }|| _        |j        }| j        }| j        }| j	        }	| j
        }
| j        dddf         }t          j        |
j        |
          }|||z  t          j        |j        t          j        |	|                    z  z   }t          j        |
j        |          ||z  t          j        |j        t          j        |	|                    z  z   }t          j                            |d          }|                    |                              |          }t          j        ||          }t          j        |          }|dk    r|}n|dk    r|}nt%          d          || _        || _        || _        t-          | |||          }||_        |S )	a  Estimate parameters and return results instance

        Parameters
        ----------
        pen_weight : float
            penalization factor for the restriction, default is 1.
        cov_type : str, 'data-prior' or 'sandwich'
            'data-prior' assumes that the stochastic restriction reflects a
            previous sample. The covariance matrix of the parameter estimate
            is in this case the same form as the one of GLS.
            The covariance matrix for cov_type='sandwich' treats the stochastic
            restriction (R and q) as fixed and has a sandwich form analogously
            to M-estimators.

        Returns
        -------
        results : TheilRegressionResults instance

        Notes
        -----
        cov_params for cov_type data-prior, is calculated as

        .. math:: \sigma^2 A^{-1}

        cov_params for cov_type sandwich, is calculated as

        .. math:: \sigma^2 A^{-1} (X'X) A^{-1}

        where :math:`A = X' \Sigma X + \lambda \sigma^2 R' \Simga_p^{-1} R`

        :math:`\sigma^2` is an estimate of the error variance.
        :math:`\sigma^2` inside A is replaced by the estimate from the initial
        GLS estimate. :math:`\sigma^2` in cov_params is obtained from the
        residuals of the final estimate.

        The sandwich form of the covariance estimator is not robust to
        misspecified heteroscedasticity or autocorrelation.
        r   NgKH9)rcondr-   z
data-priorz-cov_type has to be 'sandwich' or 'data-prior')normalized_cov_paramsuse_t)r   r&   r   r   fitres_gls	mse_residr   r   r$   wexogwendogr   dotTr"   r#   squeezer   r0   xpxisigma2_eTheilRegressionResultspenalization_factor)r%   
pen_weightcov_typer1   lambdr3   r;   r   r   r$   xyxxxpxxpyr:   xpxi_sandwichparamsr0   lfits                       r+   r2   zTheilGLS.fit   s   N dj$)4:>>>BBDD$==.JK$
 VAC^^
BF?H4U4U!V!VVWfQS!nn
BF?H4U4U!V!VVW y~~c~22((..c""F##z!!$1!!%%$(!!LMMM%2"	 %dF-B%Q Q Q $)     aiccc                 H     |i } fd}ddl m}  |j        ||fi |}|S )a\  find penalization factor that minimizes gcv or an information criterion

        Parameters
        ----------
        method : str
            the name of an attribute of the results class. Currently the following
            are available aic, aicc, bic, gc and gcv.
        start_params : float
            starting values for the minimization to find the penalization factor
            `lambd`. Not since there can be local minima, it is best to try
            different starting values.
        optim_args : None or dict
            optimization keyword arguments used with `scipy.optimize.fmin`

        Returns
        -------
        min_pen_weight : float
            The penalization factor at which the target criterion is (locally)
            minimized.

        Notes
        -----
        This uses `scipy.optimize.fmin` as optimizer.
        Nc                 J    t                              |                     S )N)getattrr2   )r@   methodr%   s    r+   get_icz*TheilGLS.select_pen_weight.<locals>.get_ic  s     488E??F333rI   r   )optimize)scipyrP   fmin)r%   rN   start_params
optim_argsrO   rP   r@   s   ``     r+   select_pen_weightzTheilGLS.select_pen_weight   sc    2 J	4 	4 	4 	4 	4 	4
 	#"""""flAAjAArI   )NNNN)r,   r-   T)rJ   r,   N)__name__
__module____qualname____doc__r   r2   rU   __classcell__r*   s   @r+   r
   r
   +   s        N N` =A)-.; .; .; .; .; .;`P P P Pd' ' ' ' ' ' ' 'rI   r
   c                        e Zd Z fdZed             Zd Zed             Zed             Zed             Z	d Z
d Z xZS )	r<   c                      t                      j        |i | |                                 dz
  | _        | j        j        j        d         | j        z
  dz
  | _        d S )Nr   r   )r   r   hatmatrix_tracedf_modelmodelr&   r   df_resid)r%   argskwdsr*   s      r+   r   zTheilRegressionResults.__init__0  s]    $'$''' ,,..2
(.q1DMAAErI   c                     | j         j        }| j         j        t          j        || j         j        j                  j        z                      d          S )a  diagonal of hat matrix

        diag(X' xpxi X)

        where xpxi = (X'X + sigma2_e * lambd * sigma_prior)^{-1}

        Notes
        -----

        uses wexog, so this includes weights or sigma - check this case

        not clear whether I need to multiply by sigmahalf, i.e.

        (W^{-0.5} X) (X' W X)^{-1} (W^{-0.5} X)'  or
        (W X) (X' W X)^{-1} (W X)'

        projection y_hat = H y    or in terms of transformed variables (W^{-0.5} y)

        might be wrong for WLS and GLS case
        r   )r`   r0   r5   r   r7   r8   sum)r%   r:   s     r+   hatmatrix_diagz%TheilRegressionResults.hatmatrix_diag7  sE    . z/ 
 26$
0@0B#C#C#EEJJ1MMMrI   c                 4    | j                                         S )ztrace of hat matrix
        )rf   re   r%   s    r+   r^   z&TheilRegressionResults.hatmatrix_traceT  s     "&&(((rI   c                 V    | j         d|                                 | j        z  z
  dz  z  S Nr,      )r4   r^   nobsrh   s    r+   gcvzTheilRegressionResults.gcv`  s,    ~d&:&:&<&<ty&H!H1 LLLrI   c                 `    | j         d| j        z
  z  dz                                  | j        z  S rj   )residrf   re   rl   rh   s    r+   cvzTheilRegressionResults.cvd  s/    rD$7781<AACCdiOOrI   c                     t          j        | j                  dz   }| j        |                                 z
  dz
  }|dk    rdd|                                 z   z  |z  }nt           j        }||z   S )Nr   rk   r   r,   )r   logr4   rl   r^   inf)r%   aiceff_dofadjs       r+   rJ   zTheilRegressionResults.aicch  sq    fT^$$q()d22444q8Q;;rD002223g=CC&CSyrI   c                 <   t          | j        j        | j        j                                                  }| j        j        }| j        j        |                    |j                  dddf         z
  }|	                    |          }|j
                            t          j                            || j        j        z   |                    }ddlm} t          j                            | j        j                  }|j                            ||          }|||fS )zFHypothesis test for the compatibility of prior mean with data
        N)r   r   )stats)r   r`   r&   r   r2   r   r   r7   rG   
cov_paramsr8   r   r"   solver!   rQ   rx   matrix_rankchi2sf)	r%   res_olsr_matr_diff	ols_cov_r	statisticrx   dfpvalues	            r+   test_compatibilityz)TheilRegressionResults.test_compatibilityr  s     dj&
88<<>>
#$uyy'@'@4'HH&&&66	HLLTZ=S1SU[!\!\]]	Y""4:#9::y"--&"$$rI   c                 0    | j         dz   | j        j        z  S )a  a measure for the fraction of the data in the estimation result

        The share of the prior information is `1 - share_data`.

        Returns
        -------
        share : float between 0 and 1
            share of data defined as the ration between effective degrees of
            freedom of the model and the number (TODO should be rank) of the
            explanatory variables.
        r   )r_   r`   rankrh   s    r+   
share_dataz!TheilRegressionResults.share_data  s      !TZ_44rI   )rV   rW   rX   r   r   rf   r^   rm   rp   rJ   r   r   rZ   r[   s   @r+   r<   r<   .  s        F F F F F N N ^N8) ) ) M M ^M P P ^P   ^% % %$5 5 5 5 5 5 5rI   r<   c                     t          j        |           d| z  z
  }||S t          j        | |f          }||d d ||| z   f<   |S )Nr,   )r   r   r   )n_coeffsn_varspositionreducedfulls        r+   coef_restriction_meandiffr     sZ    fXH,G~x6*++.5QQQ(***+rI   c                     t          j        |            }d|d d |f<   t          |           }||= t          j        ||d          }||S t          j        | dz
  |f          }||d d ||| z   f<   |S )Nr   r   )axis)r   r   r   taker   )r   r   r   base_idxr   keepr   s          r+   coef_restriction_diffbaser     s    vhGGAAAxK(DXggt!,,,G~x!V,--.5QQQ(***+rI   c                     | d| dz  z
  z   S )Nr   rk    )ds    r+   next_oddr     s    AE	?rI   r   c           	         |dk    rddg}d}n5|dk    r/ddl m} t          |dz             }|                    ||          }t	          j        |t	          j        | t          |          z
            f          }ddl m}	 |		                    |t	          j        | t          |          z
  dz                       j
        }
||
S t	          j        | dz
  |f          }|
|d d ||| z   f<   |S )Nr   rk   r   )misc)ndiv)r"   )rQ   r   r   central_diff_weightsr   concatenater   lenr"   toeplitzr8   )r   degreer   r   r   diff_coeffsn_pointsr   dffr"   r   r   s               r+   coef_restriction_diffseqr     s    {{1g	!FQJ''//v/FF
.+rx3{;K;K0K'L'LM
N
NCooc28Hs;7G7G,G!,K#L#LMMOG ~x!V,--.5QQQ(***+rI   )Nr   )Nr   r   )r   Nr   r   )rY   statsmodels.compat.pythonr   numpyr   statsmodels.tools.decoratorsr   #statsmodels.regression.linear_modelr   r   r   #statsmodels.regression.feasible_glsr   r
   r<   r   r   r   r   r   rI   r+   <module>r      s.  ! !D - , , , , ,     7 7 7 7 7 7 K K K K K K K K K K > > > > > >z z z z zs z z zFf5 f5 f5 f5 f5. f5 f5 f5V      "       rI   