
    M/Ph4                     r    d dl Zd dlZd dlmZ  G d d          Z G d d          Z G d de          ZdS )	    N)LikelihoodModelResultsc                   2    e Zd ZdZd	dZd Zd Zd Zd ZdS )
BayesGaussMIa  
    Bayesian Imputation using a Gaussian model.

    The approach is Bayesian.  The goal is to sample from the joint
    distribution of the mean vector, covariance matrix, and missing
    data values given the observed data values.  Conjugate priors for
    the population mean and covariance matrix are used.  Gibbs
    sampling is used to update the mean vector, covariance matrix, and
    missing data values in turn.  After burn-in, the imputed complete
    data sets from the Gibbs chain can be used in multiple imputation
    analyses (MI).

    Parameters
    ----------
    data : ndarray
        The array of data to be imputed.  Values in the array equal to
        NaN are imputed.
    mean_prior : ndarray, optional
        The covariance matrix of the Gaussian prior distribution for
        the mean vector.  If not provided, the identity matrix is
        used.
    cov_prior : ndarray, optional
        The center matrix for the inverse Wishart prior distribution
        for the covariance matrix.  If not provided, the identity
        matrix is used.
    cov_prior_df : positive float
        The degrees of freedom of the inverse Wishart prior
        distribution for the covariance matrix.  Defaults to 1.

    Examples
    --------
    A basic example with OLS. Data is generated assuming 10% is missing at
    random.

    >>> import numpy as np
    >>> x = np.random.standard_normal((1000, 2))
    >>> x.flat[np.random.sample(2000) < 0.1] = np.nan

    The imputer is used with ``MI``.

    >>> import statsmodels.api as sm
    >>> def model_args_fn(x):
    ...     # Return endog, exog from x
    ...    return x[:, 0], x[:, 1:]
    >>> imp = sm.BayesGaussMI(x)
    >>> mi = sm.MI(imp, sm.OLS, model_args_fn)
    N   c                    d | _         t          |          t          j        u r|j        | _         t          j        |d          }|| _        || _        t          j	        |          | _
        | j
        j        d         | _        | j
        j        d         | _        dt          j        dt          j        | j
        j        d                   z             z   }t          j        | j
        |          }i }t#          |          D ]0\  }}	|	dk    r|	|vrg ||	<   ||	                             |           1d |                                D             | _        | j        j        d         }
t          j        |
          | _        g }t/          |
          D ]{}| j        d d |f         }	|	t          j        |	                   }	t3          |	          dk    rd|z  }t5          |          |                    |	                                           |t          j        |          | _        |t          j        |
          }|| _        |t          j        |
          }|| _        || _        d S )NW)requirementsr   r   c                 6    g | ]}t          j        |          S  )npasarray).0vs     _/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/imputation/bayes_mi.py
<listcomp>z)BayesGaussMI.__init__.<locals>.<listcomp>O   s     @@@1A@@@    z Column %d has no observed values) 
exog_namestypepd	DataFramecolumnsr   requiredata_dataisnanmaskshapenobsnvarlogarangedot	enumerateappendvaluespatternseyecovrangeisfinitelen
ValueErrormeanr   
mean_prior	cov_priorcov_prior_df)selfr   r.   r/   r0   zcrowmapir   pr-   msgs                r   __init__zBayesGaussMI.__init__7   s&   ::%%"lDOz$S111	
HTNN	IOA&	IOA&	 q29TY_Q%7888999F49a  aLL 	  	 DAqAvvq	1IQ@@@@@ JQ6!99q 	" 	"A
111a4 A"+a..!A1vv{{81< oo%KK!!!!Jt$$	 J$ q		I" )r   c                 ~    |                                   |                                  |                                  dS )z2
        Cycle through all Gibbs updates.
        N)update_dataupdate_mean
update_cov)r1   s    r   updatezBayesGaussMI.updatek   s@    
 	 	r   c           	      `   | j         D ]}|d         }t          j        | j        |ddf                   }t          j        | j        |ddf                    }| j        |         }| j        |         }| j        |ddf         dd|f         }| j        |ddf         dd|f         }| j        |ddf         dd|f         }	| j        |ddf         dd|f         |z
  }
|t          j        |	t          j        	                    ||
j
                            j
        z   }|t          j        |	t          j        	                    ||	j
                            z
  }t          j                            |          }t          j                            t          |          t          |          f          }|t          j        ||j
                  z   | j        t          j        ||          <   | j        (t#          j        | j        | j        d          | _        dS | j        | _        dS )z:
        Gibbs update of the missing data values.
        r   NsizeF)r   copy)r&   r   flatnonzeror   r-   r(   r   r"   linalgsolveTcholeskyrandomnormalr+   ix_r   r   r   r   )r1   ixr5   ix_missix_obsmmmovoovmmvmorcmcvcsus                  r   r:   zBayesGaussMI.update_datav   s   
 - 	C 	CB1AnTYq!!!t_55G^TYq!!!t_$455F7#B6"B(61119%aaai0C(7AAA:&qqq'z2C(7AAA:&qqq&y1C
2qqq5!!!!V),r1AbfS")//#qs";";<<>>Brvc29??3#>#>???B##B''B	  s2wwG&= >>A.026!RT??.BDJrvb'**++ ?&:#'? %' ' 'DIII
 
DIIIr   c           	      "   t           j                            | j        | j        z  | j        z   | j        | j        z            }t          j        | j        |          }t           j                            | j        | j                            d                    }t          j        ||          }t           j        	                    |          }|t          j        |t           j
                            dd| j                            z   | _        dS )zo
        Gibbs update of the mean vector.

        Do not call until update_data has been called once.
        r   r   N)r   rC   rD   r(   r   r.   r"   r   sumrF   rG   rH   r   r-   )r1   rS   vmrR   s       r   r;   zBayesGaussMI.update_mean   s     Y__TXdi/$/A!_ty8: :VDHb!! Y__TXtz~~a'8'899VB^^ Ir""29#3#3Aq$)#D#DEEE			r   c                 :   | j         | j        z
  }t          j        |j        |          }|| j        z   }t          t          j        | j        | j	        z                       }t          j
                            t          j
                            |                    }t          j        t          j                            || j        f          |j                  }t          j        |j        |          }t          j
                            |          | _        dS )zu
        Gibbs update of the covariance matrix.

        Do not call until update_data has been called once.
        r?   N)r   r-   r   r"   rE   r/   intceilr   r0   rC   rF   invrG   rH   r   r(   )r1   rR   gradfxmas          r   r<   zBayesGaussMI.update_cov   s     J"VAC^^T%667788Iry}}Q//00F29##"di#9913??VAC^^9==$$r   )NNr   )	__name__
__module____qualname____doc__r8   r=   r:   r;   r<   r   r   r   r   r      st        . .`2) 2) 2) 2)h	 	 	!# !# !#FF F F*% % % % %r   r   c                   .    e Zd ZdZ	 	 	 d	dZd
dZd ZdS )MIa{  
    MI performs multiple imputation using a provided imputer object.

    Parameters
    ----------
    imp : object
        An imputer class, such as BayesGaussMI.
    model : model class
        Any statsmodels model class.
    model_args_fn : function
        A function taking an imputed dataset as input and returning
        endog, exog.  If the model is fit using a formula, returns
        a DataFrame used to build the model.  Optional when a formula
        is used.
    model_kwds_fn : function, optional
        A function taking an imputed dataset as input and returning
        a dictionary of model keyword arguments.
    formula : str, optional
        If provided, the model is constructed using the `from_formula`
        class method, otherwise the `__init__` method is used.
    fit_args : list-like, optional
        List of arguments to be passed to the fit method
    fit_kwds : dict-like, optional
        Keyword arguments to be passed to the fit method
    xfunc : function mapping ndarray to ndarray
        A function that is applied to the complete data matrix
        prior to fitting the model
    burn : int
        Number of burn-in iterations
    nrep : int
        Number of imputed data sets to use in the analysis
    skip : int
        Number of Gibbs iterations to skip between successive
        multiple imputation fits.

    Notes
    -----
    The imputer object must have an 'update' method, and a 'data'
    attribute that contains the current imputed dataset.

    xfunc can be used to introduce domain constraints, e.g. when
    imputing binary data the imputed continuous values can be rounded
    to 0/1.
    Nd      
   c                 $   || _         || _        || _        || _        |d }|}|| _        |d }|}|| _        |d }|}|| _        |d }|}|| _        || _        |
| _	        || _        t          |	          D ]}|                                 d S )Nc                     g S Nr   ra   s    r   fzMI.__init__.<locals>.f       	r   c                     i S rn   r   ro   s    r   rp   zMI.__init__.<locals>.f  rq   r   c                     g S rn   r   ro   s    r   rp   zMI.__init__.<locals>.f  rq   r   c                     i S rn   r   ro   s    r   rp   zMI.__init__.<locals>.f  rq   r   )impskipmodelformulamodel_args_fnmodel_kwds_fnfit_argsfit_kwdsxfuncnrepr)   r=   )r1   ru   rw   ry   rz   rx   r{   r|   r}   burnr~   rv   rp   ks                 r   r8   zMI.__init__   s    
  	 
   M*   M*  H   H 
		 t 	 	AJJLLLL	 	r   c                    g g }}g }t          | j                  D ]}t          | j        dz             D ]}| j                                         | j        j        }| j        |                     |          }| j        4 | j        | 	                    |          i | 
                    |          }nA | j        j        | j        g| 	                    |          R i | 
                    |          } |j        |                     |          i |                     |          }||                     ||                     |                    t!          j        |j                                                             |                    t!          j        |                                                                                     |                     ||          \  }	}
}t-          | ||	|
          }||_        ||_        |S )a  
        Impute datasets, fit models, and pool results.

        Parameters
        ----------
        results_cb : function, optional
            If provided, each results instance r is passed through `results_cb`,
            then appended to the `results` attribute of the MIResults object.
            To save complete results, use `results_cb=lambda x: x`.  The default
            behavior is to save no results.

        Returns
        -------
        A MIResults object.
        r   )r)   r~   rv   ru   r=   r   r}   rx   rw   ry   rz   from_formulafitr{   r|   r$   r   r   paramsrA   
cov_params_combine	MIResultsfmiresults)r1   
results_cbparr(   all_resultsr   darw   resultr   r   r   rR   s                r   r   zMI.fit  s   " rSty!! 	? 	?A49Q;'' " "!!!!Bz%ZZ^^|#"
D$6$6r$:$: =%)%7%7%;%;= = 0
/,4)-););B)?)?4 4 4 ..r224 4 UYb 1 1GT]]25F5FGGF%""::f#5#5666JJrz&-"4"4"6"677888JJrz&"3"3"5"5":":"<"<==>>>>"&--S"9"9
CdE6:66	r   c                    t          j        |          }|j        d         }|                    d          }t	          |          t          |          z  }t          j        |j                  }t          j        |          }|ddt          |          z  z   |z  z   }ddt          |          z  z   t          j
        |          z  t          j
        |          z  }|||fS )Nr   r   )r   r   r   r-   rX   r+   r(   rE   
atleast_2dfloatdiag)	r1   r   r(   mr   wcovbcovcovpr   s	            r   r   zMI._combineU  s     joo IaL ! 3xx#c((" vce}}}T"" q1U1XX:~t++ 1U1XX:~.>tS  r   )	NNNNNNri   rj   rk   rn   )rc   rd   re   rf   r8   r   r   r   r   r   rh   rh      se        + +Z FJCG)+- - - -^5 5 5 5n! ! ! ! !r   rh   c                   *     e Zd ZdZ fdZddZ xZS )r   a;  
    A results class for multiple imputation (MI).

    Parameters
    ----------
    mi : MI instance
        The MI object that produced the results
    model : instance of statsmodels model class
        This can be any instance from the multiple imputation runs.
        It is used to get class information, the specific parameter
        and data values are not used.
    params : array_like
        The overall multiple imputation parameter estimates.
    normalized_cov_params : array_like (2d)
        The overall variance covariance matrix of the estimates.
    c                 j    t                                          |||           || _        || _        d S rn   )superr8   mi_model)r1   r   rw   r   normalized_cov_params	__class__s        r   r8   zMIResults.__init__  s2    (=>>>r   N皙?c                    ddl m} |                                }d}i }d|d<   | j        j        j        |d<   | j        j        |d<   d| j        j        j	        j
        d         z  |d	<   d| j        j        z  |d
<   |                    |d|           |                    | |          }| j        |d<   |                    ||           |                    ||            |S )a  
        Summarize the results of running multiple imputation.

        Parameters
        ----------
        title : str, optional
            Title for the top table. If not None, then this replaces
            the default title
        alpha : float
            Significance level for the confidence intervals

        Returns
        -------
        smry : Summary instance
            This holds the summary tables and text, which can be
            printed or converted to various output formats.
        r   )summary2z%8.3frh   zMethod:zModel:zDependent variable:z%dzSample size:zNum. imputationsl)alignfloat_format)alphaFMI)r   )titler   )statsmodels.iolibr   Summaryr   rw   rc   r   endog_namesru   r   r   r~   add_dictsummary_paramsr   add_df	add_title)r1   r   r   r   smryr   infoparams           r   summaryzMIResults.summary  s    & 	/.....!!Y/X&*k&="##dgk&6&<Q&??^#'$',#6 d#LAAA''E'::xeE555UD111r   )Nr   )rc   rd   re   rf   r8   r   __classcell__)r   s   @r   r   r   p  sV         "    ' ' ' ' ' ' ' 'r   r   )	numpyr   pandasr   statsmodels.base.modelr   r   rh   r   r   r   r   <module>r      s            9 9 9 9 9 9x% x% x% x% x% x% x% x%vl! l! l! l! l! l! l! l!^? ? ? ? ?& ? ? ? ? ?r   