
    M/Ph7@                        d dl Zd dlmZ d dlmc mZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmc mZ d dlmc mZ d dlmZ  G d d	          Z G d
 de          Z G d de          Z G d de          Z G d dej                  Z G d dej                  Z G d dej                  Z  ej!        e e           dS )    N)defaultdict)families)GLM)links)varfuncs)cache_readonlyc                       e Zd ZdZd ZdS )QIFCovarianceay  
    A covariance model for quadratic inference function regression.

    The mat method returns a basis matrix B such that the inverse
    of the working covariance lies in the linear span of the
    basis matrices.

    Subclasses should set the number of basis matrices `num_terms`,
    so that `mat(d, j)` for j=0, ..., num_terms-1 gives the basis
    of dimension d.`
    c                     t           )zX
        Returns the term'th basis matrix, which is a dim x dim
        matrix.
        )NotImplementedErrorselfdimterms      V/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/genmod/qif.pymatzQIFCovariance.mat   s
    
 "!    N)__name__
__module____qualname____doc__r    r   r   r
   r
      s-        
 
" " " " "r   r
   c                       e Zd ZdZd Zd ZdS )QIFIndependenceaQ  
    Independent working covariance for QIF regression.  This covariance
    model gives identical results to GEE with the independence working
    covariance.  When using QIFIndependence as the working covariance,
    the QIF value will be zero, and cannot be used for chi^2 testing, or
    for model selection using AIC, BIC, etc.
    c                     d| _         d S )N   	num_termsr   s    r   __init__zQIFIndependence.__init__+       r   c                 :    |dk    rt          j        |          S d S )Nr   )npeyer   s      r   r   zQIFIndependence.mat.   s    1996#;;4r   Nr   r   r   r   r    r   r   r   r   r   r   "   s<               r   r   c                       e Zd ZdZd Zd ZdS )QIFExchangeablez=
    Exchangeable working covariance for QIF regression.
    c                     d| _         d S )N   r   r   s    r   r    zQIFExchangeable.__init__:   r!   r   c                 r    |dk    rt          j        |          S |dk    rt          j        ||f          S d S )Nr   r   )r#   r$   onesr   s      r   r   zQIFExchangeable.mat=   s;    1996#;;QYY7C:&&&4r   Nr%   r   r   r   r'   r'   5   s<               r   r'   c                       e Zd ZdZd Zd ZdS )QIFAutoregressivez?
    Autoregressive working covariance for QIF regression.
    c                     d| _         d S )N   r   r   s    r   r    zQIFAutoregressive.__init__K   r!   r   c                 8   |dk     rd}t          |          |dk    rt          j        |          S |dk    r2t          j        ||f          }d|j        dd |dz   <   ||j        z  }|S |dk    r*t          j        ||f          }d|d<   d||dz
  |dz
  f<   |S d S )Nr/   z?Groups must have size at least 3 for autoregressive covariance.r   r   r)   r   r   )
ValueErrorr#   r$   zerosflatT)r   r   r   msgr   s        r   r   zQIFAutoregressive.matN   s    770CS//!1996#;;QYY(C:&&C#$CHQZQZ 35LCJQYY(C:&&CCI !CAs1uJ4r   Nr%   r   r   r   r-   r-   F   s<               r   r-   c                   b     e Zd ZdZ	 	 d fd	Zd Zd Zd Zed fd	            Z		 	 ddZ
 xZS )QIFa  
    Fit a regression model using quadratic inference functions (QIF).

    QIF is an alternative to GEE that can be more efficient, and that
    offers different approaches for model selection and inference.

    Parameters
    ----------
    endog : array_like
        The dependent variables of the regression.
    exog : array_like
        The independent variables of the regression.
    groups : array_like
        Labels indicating which group each observation belongs to.
        Observations in different groups should be independent.
    family : genmod family
        An instance of a GLM family.
    cov_struct : QIFCovariance instance
        An instance of a QIFCovariance.

    References
    ----------
    A. Qu, B. Lindsay, B. Li (2000).  Improving Generalized Estimating
    Equations using Quadratic Inference Functions, Biometrika 87:4.
    www.jstor.org/stable/2673612
    Nnonec                   
 |t          j                    }n.t          |j        t           j                  st          d          || _        t          t                    | _	        |t                      }n$t          |t                    st          d          || _        t          j        |          } t!                      j        ||f||d| t          t%          |                    | _        t)          | j                  | _        t          t                    
t/          |          D ] \  }}	
|	                             |           !
fd| j        D             | _        |                     |           d S )Nz.QIF: `family` must be a genmod family instancez2QIF: `cov_struct` must be a QIFCovariance instance)groupsmissingc                      g | ]
}|         S r   r   ).0na	groups_ixs     r   
<listcomp>z QIF.__init__.<locals>.<listcomp>   s    CCCB)B-CCCr   )r   Gaussian
issubclass	__class__Familyr2   familyr   list_fit_historyr   
isinstancer
   
cov_structr#   asarraysuperr    setgroup_nameslenendognobs	enumerateappendr@   _check_args)r   rP   exogr;   rF   rJ   r<   kwargsigr@   rD   s             @r   r    zQIF.__init__   s    >&((FFf.@@ 4  "3 4 4 4'-- (**JJj-88 J HJ J J$F##4	
 &	
 	
;A	
 	
 	
  F,,
OO	%%	f%% 	# 	#DAqaL""""CCCC$2BCCC     r   c                     t          |          t          | j                  k    rd}t          |          t          | j                  | j        j        d         k    rd}t          |          d S )Nz1QIF: groups and endog should have the same lengthr   zGQIF: the length of endog should be equal to the number of rows of exog.)rO   rP   r2   rU   shape)r   r;   r6   s      r   rT   zQIF._check_args   sc    v;;#dj//))ECS//!tz??dioa000-CS//! 10r   c                 
   | j         }| j        }t          j        ||          }| j        j                            |          }| j                            |          }| j        j                            |          }| j        j        	                    |          }| j        j        
                    |          }	| j        j        }
|j        d         }||
z  }t          j        |          }t          j        |          }t          j        ||f          }t          j        ||f          }dg|z  }t          j        ||f          }| j        j        t          j        u }t#          | j        j        t$          j        t$          j        f          }| j        D ]}t          j        ||                   }||         ||         z
  }||z  }||ddf         ||df         z  }d}t/          |
          D ]}| j                            t3          |          |          }t          j        ||          |z  }t          j        |j        |          ||||z   <   t          j        || |dddf         z            |dddf         z  }t          j        |j        |          ||||z   ddf<   |r|st/          |          D ]}t          j        ||ddf         j        ||         |||f         z  |z            } |sd|	|         z  |dd|f         z  ||         dz  z  }!t          j        |j        |!t          j        ||          z            }"t          j        |j        t          j        ||!|z            |z            }#nd\  }"}#||||z   |fxx         | |"z   |#z   z  cc<   ||z  }t/          |          D ]9}t          j        ||dd|f                   }$||xx         |$|$j        z   z  cc<   :||z  }||z  }|t          j        ||          z  }t3          | j                  }%||%z  }||%z  }||%dz  z  }t          j        |t          j                            ||                    }&t          j        |          }'t/          |          D ]}||xx         t3          | j                  dz  z  cc<   t          j                            |||                   j        }$t          j                            ||$          }$t          j        |t          j        |$|                    |'|<   dt          j        |j        t          j                            ||                    z  |'z
  }(|&|(|||fS )a  
        Calculate the gradient of the QIF objective function.

        Parameters
        ----------
        params : array_like
            The model parameters at which the gradient is evaluated.

        Returns
        -------
        grad : array_like
            The gradient vector of the QIF objective function.
        gn_deriv : array_like
            The gradients of each estimating equation with
            respect to the parameter.
        r   r   Ng      g      ?r1   r)   )rP   rU   r#   dotrF   linkinversevarianceinverse_derivinverse_deriv2derivrJ   r   rZ   r3   r   constantrI   r   Identityidentityr@   sqrtranger   rO   r5   outerlinalgsolve))r   paramsrP   rU   lprmeanvaidlidl2vdmpdgngigi_derivgn_derivcn_derivcmatfastvarfastlinkixsdresidsresidrb   jjjccrs1crs2km1vxm2m3ungrpqifgcggrads)                                            r   	objectivezQIF.objective   s=   $ 
yfT6""{'',,[!!$'' k,,S11{..s33[!''--O%JqMEXa[[Xa[[8QF##8QF##37xA+&(*;;K^U^,
 
 . %	% %	%BBB"IR(ERZFQQQK#b$h-/EB1XX  
 O''B33va((2- fUWd332bd7va%"QQQW+!566AAAtGD')vegt'<'<BqD!!!$  
=W 
="1XX 	= 	=VDQQQKM$(HtBE{$:T$AC C& *!%2qqq!t!<r"vs{!JB!#bfQ6G6G1G!H!HB!#2:1F1F1K!L!LBB%)FB BqD!,,,R"<,,,,a1XX ' 'HR!!!Q$00q13w&"HB HBHR$$$DD4>""
d
DafRr2233hqkkq 	/ 	/AQKKK3t~..11KKK	hqk224A	a((AVBq"..CFF26(*bioodB&?&?@@@3FD$H,,r   c                 t   t          | j        t          j        t          j        f          rdS t          | d          r| j        }n| j        d         }t          j	        | j        |          }| j        j
                            |          }| j        |z
  }t          j        |dz            | j        |z
  z  }|S )z
        Estimate the dispersion/scale.

        The scale parameter for binomial and Poisson families is
        fixed at 1, otherwise it is estimated from the data.
        g      ?
ddof_scaler   r)   )rI   rF   r   BinomialPoissonhasattrr   rU   r#   r\   r]   r^   rP   sumrQ   )r   rk   r   rl   rm   r   scales          r   estimate_scalezQIF.estimate_scale  s     dkH$5x7G#HII 	24&& 	&JJ1JfTY''{'',,
T!uax  DI
$:;r   c                     t          |t                    r||         } t                      j        |g|R |||d|}|S )a  
        Create a QIF model instance from a formula and dataframe.

        Parameters
        ----------
        formula : str or generic Formula object
            The formula specifying the model
        groups : array_like or string
            Array of grouping labels.  If a string, this is the name
            of a variable in `data` that contains the grouping labels.
        data : array_like
            The data for the model.
        subset : array_like
            An array_like object of booleans, integers, or index
            values that indicate the subset of the data to used when
            fitting the model.

        Returns
        -------
        model : QIF model instance
        )datasubsetr;   )rI   strrL   from_formula)	clsformular;   r   r   argsrV   modelrD   s	           r   r   zQIF.from_formula2  si    2 fc"" 	"&\F$$3#'3 3!%f 3 3+13 3 r   d   ư>-C6?c           	         || j         j        d         | _        n|| _        |=t          | j        | j         | j                  }|                                }|j        }n|}t          |          D ]@}	| 	                    |          \  }
}}}	}t          j        t          j        ||z                      }| j        d                             |
           | j        d                             |           ||k     r ndt          j        |j        t          j                            ||                    z  }t          j                            ||          }t          j        t          j        ||z                      }| j        d                             |           ||k     r n||z  }Bt          j        |j        t          j                            ||                    }t          j                            |          }|                     |          }t+          | |||z  |          }| j        |_        t/          t0                    | _        t3          |          S )a,  
        Fit a GLM to correlated data using QIF.

        Parameters
        ----------
        maxiter : int
            Maximum number of iterations.
        start_params : array_like, optional
            Starting values
        tol : float
            Convergence threshold for difference of successive
            estimates.
        gtol : float
            Convergence threshold for gradient.
        ddof_scale : int, optional
            Degrees of freedom for the scale parameter

        Returns
        -------
        QIFResults object
        Nr   )rF   r   gradnormr)   stepnorm)rU   rZ   r   r   rP   rF   fitrk   rg   r   r#   rf   r   rH   rS   r\   r5   ri   rj   invr   
QIFResultsfit_historyr   rG   QIFResultsWrapper)r   maxiterstart_paramstolgtolr   r   resultrk   _r   r   rz   rx   gnormcjacstepsnormvcovr   rslts                        r   r   zQIF.fitT  s
   0 "ioa0DOO(DO
DIdkBBBEYY[[F]FF!Fw 	 	A+/>>&+A+A(CtQGBF4$;//00Ee$++C000j)00777t||rvhj")//$*I*IJJJD9??4..DGBF4$;//00Ej)00777s{{dNFFvhj")//$"A"ABBy}}T""##F++$ue<<,'-- &&&r   )NNr9   )N)r   Nr   r   N)r   r   r   r   r    rT   r   r   classmethodr   r   __classcell__rD   s   @r   r8   r8   e   s         6 48*0%! %! %! %! %! %!N	" 	" 	"f- f- f-P  .      [B BF@' @' @' @' @' @' @' @'r   r8   c                   p     e Zd ZdZ	 d
 fd	Zed             Zed             Zed             Zdd	Z	 xZ
S )r   z Results class for QIF RegressionFc                     t                                          ||||           | j                            |          \  | _        }}}}d S )N)normalized_cov_paramsr   )rL   r    r   r   r   )	r   r   rk   
cov_paramsr   use_tkwdsr   rD   s	           r   r    zQIFResults.__init__  sX     	6 	 	 	 	  $z33F;;!Q111r   c                     t          | j        j        t                    rd}t	          |          | j        j        j        d         }| j        d|z  z   S )zA
        An AIC-like statistic for models fit using QIF.
        z1AIC not available with QIFIndependence covariancer   r)   )rI   r   rJ   r   r2   rU   rZ   r   r   r6   dfs      r   aiczQIFResults.aic  sM    
 dj+_== 	"ECS//!Z_"1%x!B$r   c                     t          | j        j        t                    rd}t	          |          | j        j        j        d         }| j        t          j	        | j        j
                  |z  z   S )z@
        A BIC-like statistic for models fit using QIF.
        z1BIC not available with QIFIndependence covariancer   )rI   r   rJ   r   r2   rU   rZ   r   r#   logrQ   r   s      r   biczQIFResults.bic  s]    
 dj+_== 	"ECS//!Z_"1%x"&11"444r   c                     | j         j        j                            t	          j        | j         j        | j                            S )z;
        Returns the fitted values from the model.
        )r   rF   r]   r^   r#   r\   rU   rk   r   s    r   fittedvalueszQIFResults.fittedvalues  s9    
 z %--tz446 6 	6r   N皙?c           	      n   dddgfd| j         j        j        j        gfd| j         j        j        j        gfddg}d | j         j        D             }d	t          |          gfd
t          |          gfdt          |          gfdt          |          gfddt          j        |          z  gfdd| j        z  gfg}|| j         j        j        dz   dz   }|| j         j        }|| j         j        }ddlm}  |            }	|	                    | |||||           |	                    | |||d           |	S )ag  
        Summarize the QIF regression results

        Parameters
        ----------
        yname : str, optional
            Default is `y`
        xname : list[str], optional
            Names for the exogenous variables, default is `var_#` for ## in
            the number of regressors. Must match the number of parameters in
            the model
        title : str, optional
            Title for the top table. If not None, then this replaces
            the default title
        alpha : float
            significance level for the confidence intervals

        Returns
        -------
        smry : Summary instance
            this holds the summary tables and text, which can be
            printed or converted to various output formats.

        See Also
        --------
        statsmodels.iolib.summary.Summary : class to hold summary results
        )zDep. Variable:NzMethod:r8   zFamily:zCovariance structure:)zDate:N)zTime:Nc                 ,    g | ]}t          |          S r   )rO   )r>   ys     r   rA   z&QIFResults.summary.<locals>.<listcomp>  s    333c!ff333r   zNo. Observations:zNo. clusters:zMin. cluster size:zMax. cluster size:zMean cluster size:z%.1fzScale:z%.3fN zRegression Resultsr   )Summary)gleftgrightynamexnametitleF)r   r   alphar   )r   rF   rD   r   rJ   r@   r   rO   minmaxr#   rm   r   
exog_namesendog_namesstatsmodels.iolib.summaryr   add_table_2colsadd_table_params)
r   r   r   r   r   top_leftNY	top_rightr   smrys
             r   summaryzQIFResults.summary  s   : -(!2!<!E FG,j+5>?A## 43dj2333)CGG95%By1*SWWI6*SWWI6*Vbgbkk-A,BC$*!4 56	 =J(1C7$%E
 =J)E=J*E 	655555wyyT)#(#( 	 	* 	* 	* 	d%u$) 	 	8 	8 	8 r   )F)NNNr   )r   r   r   r   r    r   r   r   r   r   r   r   s   @r   r   r     s        **< < < < < <   ^ 5 5 ^5 6 6 ^6E E E E E E E Er   r   c                       e Zd ZdS )r   N)r   r   r   r   r   r   r   r     s        Dr   r   )"numpyr#   collectionsr   statsmodels.base.modelbaser   statsmodels.genmodr   +statsmodels.genmod.generalized_linear_modelr   statsmodels.genmod.familiesr   r   #statsmodels.regression.linear_model
regressionlinear_modellmstatsmodels.base.wrapperwrapperwrapstatsmodels.tools.decoratorsr   r
   r   r'   r-   Modelr8   LikelihoodModelResultsr   RegressionResultsWrapperr   populate_wrapperr   r   r   <module>r      s*       # # # # # # % % % % % % % % % ' ' ' ' ' ' ; ; ; ; ; ; - - - - - - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ' ' ' ' ' ' ' ' ' 7 7 7 7 7 7" " " " " " " "*    m   &    m   "       >o' o' o' o' o'$* o' o' o'd	n n n n n, n n nb	 	 	 	 	3 	 	 	  ' 4 4 4 4 4r   