
    M/PhY                        d dl mZ d dlZd dlZd dlmZmZ d dlZd dl	m
Z
 d dlmZmZmZ d dlmZ d dlmZ d Zd	 Zd
 Zd Zd Zd Zd Zd Z G d d          Z G d d          Zedk    rld dlZd dlmZ  ej         ddg d          Z! ede!          "                                Z# ede!          "                                Z$ ee#d          Z%dS dS )    )lrangeN)	DataFrameIndex)stats)_has_intercept_intercept_idx_remove_intercept_patsy)summary2)OLSc                     ||                                  S |dk    r| j        S |dk    r| j        S |dk    r| j        S |dk    r| j        S t          d|z            )Nhc0hc1hc2hc3z robust options %s not understood)
cov_paramscov_HC0cov_HC1cov_HC2cov_HC3
ValueError)modelrobusts     W/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/stats/anova.py_get_covariancer      sq    ~!!!	5}	5}	5}	5};fDEEE    c                 &   |                     dd          }|                     dd          }|                     dd          }|                     dd          }|r|                                }| j        j        }| j        j        }|j        d         }| j        j        }	| j        j        j        }
| j        j	        }t          |
j                  t          |
          z
  dz   }d	|z  }d
dd||g}t          t          j        |df          |          }|dv rt!          | ||||
|||||
  
        S |dv rt#          | |
||||          S |dv rt%          | |
||||          S |dv rt'          d          t)          dt+          |          z            )a9  
    Anova table for one fitted linear model.

    Parameters
    ----------
    model : fitted linear model results instance
        A fitted linear model
    typ : int or str {1,2,3} or {"I","II","III"}
        Type of sum of squares to use.

    **kwargs**

    scale : float
        Estimate of variance, If None, will be estimated from the largest
    model. Default is None.
        test : str {"F", "Chisq", "Cp"} or None
        Test statistics to provide. Default is "F".

    Notes
    -----
    Use of this function is discouraged. Use anova_lm instead.
    testFscaleNtyp   r   r   zPR(>%s)dfsum_sqmean_sq   columnsr!   I)   II)   III)   IVzType IV not yet implementedzType %s not understood)getlowerr   endogexogshapeendog_namesdatadesign_info
exog_nameslentermsr   r   npzerosanova1_lm_singleanova2_lm_singleanova3_lm_singleNotImplementedErrorr   str)r   kwargsr   r   r    r   r2   r3   nobsresponse_namer7   r8   n_rowspr_testnamestables                   r   anova_singlerI   #   s   . ::fc""DJJw%%E
**UA

CZZ$''F  KE;D:a=DK+M+".K'J+#$$~k'B'BBQFF$G8Yg6Ebh{++U;;;E
hudD+u &gv? ? 	?				{FD' &( ( 	(	
		{FD' &( ( 	(				!"?@@@1CHH<===r   c
                 J   t          | dd          }
|
<t          j                            |          \  }}t          j        |j        |          }
t          j        t          j                  t          j	                  f          }fdj
        D             }t          |          D ]\  }}d|||f<   t          j        ||
dz            }t                    }||          }t          j        j
                  }||          }|                                }t          |dgz             |_        t          j        ||                              d          |f         |j        |ddgf<   | j        | j        f|j        dddgf<   |d	k    r}|d         |d         z  | j        | j        z  z  ||<   t,          j                            |d	         |d         | j                  ||<   t          j        t          j        f|j        d||gf<   |d         |d         z  |d
<   |S )a  
    Anova table for one fitted linear model.

    Parameters
    ----------
    model : fitted linear model results instance
        A fitted linear model

    **kwargs**

    scale : float
        Estimate of variance, If None, will be estimated from the largest
    model. Default is None.
        test : str {"F", "Chisq", "Cp"} or None
        Test statistics to provide. Default is "F".

    Notes
    -----
    Use of this function is discouraged. Use anova_lm instead.
    effectsNc                 :    g | ]}                     |          S  )slice).0namer7   s     r   
<listcomp>z$anova1_lm_single.<locals>.<listcomp>|   s'    III$k%%IIIr   r!   r*   Residualr"   r#   r   r$   )getattrr;   linalgqrdotTr<   r9   r:   column_names
term_names	enumerater   arraytolistr   indexc_sumlocssrdf_residr   fsfnan)r   r2   r3   rC   r7   rH   rE   r   rF   r   rK   qrarrslicesislice_r#   idxrY   r]   s       `                r   r=   r=   _   s   . eY--Gill4  !&e$$
(C)**C0H,I,IJ
K
KCIIII+2HIIIFf%%  &AvIVC!$$F

%
%CSD\F+011JSD!JE,--EK)+sC4y}}Q/?/?/G)HEIedH%%&-2Y-FEIj8D/)*s{{h%+5	EN24dE#Jd$)N4 4g13	*tWo-.Xt4E)Lr   c                    |j         dd         }t          |          }dd||g}t          t          j        |df          |          }t          | d          }	t          | |          }
g }g }t          |          D ]\  }}|                    |          }t          |j	        |j
                  }g }t          |j                  }|D ]}t          |j                  }|                    |          ru||k    so|                    |          }|                    t          |j	        |j
                             |                    t          |j	        |j
                             t          j        | j        j        j        d                   |         }t          j        | j        j        j        d                   |         }|j        rt          j        t          j        ||
          |j                  }ddlm} |                    |          \  }}|j        d         |j        d         z
  }t          j        |dd| df         j        |          }n|}|j        d         }|d	k    rQ|                     ||

          }|j        x|j        |j        |         |f<   }|j        |j        |j        |         |f<   ||j        |j        |         df<   |                    |j	                   |                    |                                           tA          |dgz             |_        |j!        t          j"        || j        j        j        d         dz   gz                      }||         |d         z  | j#        z  | j$        z  }||d<   | j#        | j$        t          j%        t          j%        f|j        ddd||gf<   |S )a  
    Anova type II table for one fitted linear model.

    Parameters
    ----------
    model : fitted linear model results instance
        A fitted linear model

    **kwargs**

    scale : float
        Estimate of variance, If None, will be estimated from the largest
    model. Default is None.
        test : str {"F", "Chisq", "Cp"} or None
        Test statistics to provide. Default is "F".

    Notes
    -----
    Use of this function is discouraged. Use anova_lm instead.

    Type II
    Sum of Squares compares marginal contribution of terms. Thus, it is
    not particularly useful for models with significant interaction terms.
    Nr#   r"   r.   r&   r!   r   )rT   r   cov_prR   )&r:   r	   r   r;   r<   r   rZ   rN   r   startstopsetfactorsissubsetextendeyer   r3   r4   sizerV   rW   scipyrT   rU   f_testfvaluer`   r]   pvalueappendrP   r   ilocargsortra   rb   re   )r   r7   rE   r   rF   r   
terms_inforG   rH   cov
robust_cov	col_orderr]   rj   termcolsL1L2term_sett	other_setcolLVLrT   
orth_compl_rg   L12rc   
test_valuera   s                                  r   r>   r>      s   2 "111%J(44JtT7+Ebh{++u===E
%
&
&C //JIEZ(( '" '"4   &&DJ	**t|$$ 	7 	7AAII  ++ 7H	4I4I!''**		&CH55666		&CH55666VEK$*1-..r2VEK$*1-..r27 
	&:..rt44C$$$$$$!99S>>LJqbhqk)A &AAAqbccE*,b11CCCA3;;S
33A;<8CEIek!nd*+j12EIek!ng-. +,	%+a.$&'$$$TYY[[!!!!,--EKJrz)u{/?/Ea/H/J.K"KLLME
+d
#ei
/
>CE(O=BY<AN<>FBF=LEIj8D$889 Lr   c                    |t          |          z  }|j        }dd||g}t          t          j        |df          |          }t          | |          }	g }
g }t          |          D ]\  }}|                    |          }t          j        | j	        j
        j        d                   |         }|}|j        d         }|dk    rQ|                     ||	          }|j        x|j        |j        |         |f<   }|j        |j        |j        |         |f<   ||j        |j        |         df<   |                    |                                           t'          |d	gz             |_        ||         |d         z  | j        z  | j        z  }||d<   | j        | j        t          j        t          j        f|j        d	dd||gf<   |S )
Nr#   r"   r.   r&   r!   r   r   rn   rR   )r   r:   r   r;   r<   r   rZ   rN   rv   r   r3   r4   ry   rz   r`   r]   r{   r|   rP   r   ra   rb   re   )r   r7   rE   r   rF   r   r   rG   rH   r   r   r]   rj   r   r   r   r   rg   rc   r   ra   s                        r   r?   r?      s   
n[)))F"JtT7+Ebh{++u===E
%
(
(CIEZ(( " "4  &&VEK$*1-..t4HQK3;;S,,A;<8CEIek!nd*+j12EIek!ng-. +,	%+a.$&'TYY[[!!!!,--EK +d
#ei
/
>CE(O=BY<AN<>FBF=LEIj8D$889 Lr   c                     |                     dd          }t          |           dk    r| d         }t          |fi |S |dvrt          dt	          |          z            |                     dd          }|                     dd	          }t          |           }d
|z  }dddd||g}t          t          j        |df          |          }	|s| d         j        }d | D             |	d<   d | D             |	d<   t          j	        |	d         j
                   |	j        |	j        dd	         df<   |	d         	                                 |	d<   |dk    r{|	d         |	d         z  |z  |	d<   t          j                            |	d         |	d         |	d                   |	|<   t          j        |	j        |	d                                         |f<   |	S )a	  
    Anova table for one or more fitted linear models.

    Parameters
    ----------
    args : fitted linear model results instance
        One or more fitted linear models
    scale : float
        Estimate of variance, If None, will be estimated from the largest
        model. Default is None.
    test : str {"F", "Chisq", "Cp"} or None
        Test statistics to provide. Default is "F".
    typ : str or int {"I","II","III"} or {1,2,3}
        The type of Anova test to perform. See notes.
    robust : {None, "hc0", "hc1", "hc2", "hc3"}
        Use heteroscedasticity-corrected coefficient covariance matrix.
        If robust covariance is desired, it is recommended to use `hc3`.

    Returns
    -------
    anova : DataFrame
        When args is a single model, return is DataFrame with columns:

        sum_sq : float64
            Sum of squares for model terms.
        df : float64
            Degrees of freedom for model terms.
        F : float64
            F statistic value for significance of adding model terms.
        PR(>F) : float64
            P-value for significance of adding model terms.

        When args is multiple models, return is DataFrame with columns:

        df_resid : float64
            Degrees of freedom of residuals in models.
        ssr : float64
            Sum of squares of residuals in models.
        df_diff : float64
            Degrees of freedom difference from previous model in args
        ss_dff : float64
            Difference in ssr from previous model in args
        F : float64
            F statistic comparing to previous model in args
        PR(>F): float64
            P-value for significance comparing to previous model in args

    Notes
    -----
    Model statistics are given in the order of args. Models must have been fit
    using the formula api.

    See Also
    --------
    model_results.compare_f_test, model_results.compare_lm_test

    Examples
    --------
    >>> import statsmodels.api as sm
    >>> from statsmodels.formula.api import ols
    >>> moore = sm.datasets.get_rdataset("Moore", "carData", cache=True) # load
    >>> data = moore.data
    >>> data = data.rename(columns={"partner.status" :
    ...                             "partner_status"}) # make name pythonic
    >>> moore_lm = ols('conformity ~ C(fcategory, Sum)*C(partner_status, Sum)',
    ...                 data=data).fit()
    >>> table = sm.stats.anova_lm(moore_lm, typ=2) # Type 2 Anova DataFrame
    >>> print(table)
    r    r!   r   r(   z6Multiple models only supported for type I. Got type %sr   r   r   NzPr(>%s)rb   ra   df_diffss_diff   r&   c                     g | ]	}|j         
S rM   )ra   rO   mdls     r   rQ   zanova_lm.<locals>.<listcomp>q  s    ,,,CG,,,r   c                     g | ]	}|j         
S rM   )rb   r   s     r   rQ   zanova_lm.<locals>.<listcomp>r  s    666#666r   )r0   r9   rI   r   rA   r   r;   r<   r   diffvaluesr`   r]   r   rc   rd   re   isnull)
argsrB   r    r   r   r   n_modelsrF   rG   rH   s
             r   anova_lmr     s   L **UA

C 4yyA~~QE,,V,,,
( '),S2 3 3 	3 ::fc""DJJw%%E4yyH$G	9dGDEbh!}--u===E R,,t,,,E%L66666E*-/WU:5F5M-N-N,NEIek!""oy()e))+++E)s{{9%i(885@c
E#Ji0@$)*$57 7g 35&	%*##%%w./Lr   c                 Z    t          j        dg|z            }|D ]}| |         }d||<   |S )NTF)r;   r[   )ri   slices_to_excludenindr   ss         r   
_not_slicer     s?    
(D6!8

C!  4LAJr   c                 (   t          |||j        d                   }||         }t          j        | |dd|f                             |                    }|j                            |          }t          |           t          |          z
  }||fS )ah  
    Residual sum of squares of OLS model excluding factors in `keys`
    Assumes x matrix is orthogonal

    Parameters
    ----------
    y : array_like
        dependent variable
    x : array_like
        independent variables
    term_slices : a dict of slices
        term_slices[key] is a boolean array specifies the parameters
        associated with the factor `key`
    params : ndarray
        OLS solution of y = x * params
    keys : keys for term_slices
        factors to be excluded

    Returns
    -------
    rss : float
        residual sum of squares
    df : int
        degrees of freedom
    r!   N)r   r4   r;   subtractrV   rW   r9   )	yxterm_slicesparamskeysr   params1ra   rb   s	            r   _ssr_reduced_modelr     s~    4 [$

3
3CSkG
+a111c6w//
0
0C
%))C..C1vvG$H=r   c                   0    e Zd ZdZ	 	 ddZd Zd Zd ZdS )AnovaRMa  
    Repeated measures Anova using least squares regression

    The full model regression residual sum of squares is
    used to compare with the reduced model for calculating the
    within-subject effect sum of squares [1].

    Currently, only fully balanced within-subject designs are supported.
    Calculation of between-subject effects and corrections for violation of
    sphericity are not yet implemented.

    Parameters
    ----------
    data : DataFrame
    depvar : str
        The dependent variable in `data`
    subject : str
        Specify the subject id
    within : list[str]
        The within-subject factors
    between : list[str]
        The between-subject factors, this is not yet implemented
    aggregate_func : {None, 'mean', callable}
        If the data set contains more than a single observation per subject
        and cell of the specified model, this function will be used to
        aggregate the data before running the Anova. `None` (the default) will
        not perform any aggregation; 'mean' is s shortcut to `numpy.mean`.
        An exception will be raised if aggregation is required, but no
        aggregation function was specified.

    Returns
    -------
    results : AnovaResults instance

    Raises
    ------
    ValueError
        If the data need to be aggregated, but `aggregate_func` was not
        specified.

    Notes
    -----
    This implementation currently only supports fully balanced designs. If the
    data contain more than one observation per subject and cell of the design,
    these observations need to be aggregated into a single observation
    before the Anova is calculated, either manually or by passing an aggregation
    function via the `aggregate_func` keyword argument.
    Note that if the input data set was not balanced before performing the
    aggregation, the implied heteroscedasticity of the data is ignored.

    References
    ----------
    .. [*] Rutherford, Andrew. Anova and ANCOVA: a GLM approach. John Wiley & Sons, 2011.
    Nc                    || _         || _        || _        d|v rt          d          || _        |t          d          || _        |dk    rt          j        j	        | _
        n|| _
        |                    |                    |g|z                       s-| j
        |                                  nd}t          |          |                                  d S )NCzSFactor name cannot be 'C'! This is in conflict with patsy's contrast function name.z)Between subject effect not yet supported!mean)subsetzThe data set contains more than one observation per subject and cell. Either aggregate the data manually, or pass the `aggregate_func` parameter.)r6   depvarwithinr   betweenr@   subjectpdSeriesr   aggregate_funcequalsdrop_duplicates
_aggregate_check_data_balanced)selfr6   r   r   r   r   r   msgs           r   __init__zAnovaRM.__init__  s    	&== D E E E% '7 8 8 8V##"$).D"0D{{4//y67I/JJKK 	&".!!!!A !oo%!!#####r   c                     | j                             | j        g| j        z   d          | j                                     | j                  | _         d S )NF)as_index)r6   groupbyr   r   r   aggr   r   s    r   r   zAnovaRM._aggregate  sR    Ygt|nt{:',  . ..2k; c$-.. 				r   c                 h   d}| j         D ]1}|t          | j        |                                                   z  }2i }t	          | j        j        d                   D ]`}g }| j         D ]-}|                    | j        |         j        |                    .t          |          }||v r||         dz   ||<   [d||<   ad}t          |          |k    rt          |          ||         }|D ]}|||         k    rt          |          | j        j        d         ||z  k    rt          d          dS )zraise if data is not balanced

        This raises a ValueError if the data is not balanced, and
        returns None if it is balance

        Return might change
        r!   r   zData is unbalanced.z9There are more than 1 element in a cell! Missing factors?N)
r   r9   r6   uniqueranger4   r|   r}   tupler   )	r   factor_levelswi
cell_countr]   keyr   error_messagecounts	            r   r   zAnovaRM._check_data_balanced  si    + 	9 	9BS2!5!5!7!7888MM
49?1-.. 	$ 	$EC{ 7 7

49S>.u56666**Cj  ",S/A"5
3"#
3-z??m++]+++3 	0 	0C
3'' /// (9?1 555 ) * * * 65r   c           	         | j         | j                 j        }d | j        D             }d| j        z  }||gz   }t          j        d                    |          | j                   }|j        j	        }|D ]G}t          j        dg|j        d         z            }d|||         <   t          j        |          ||<   Hd                    |          g}	t          ||	|j        d                   }|d	d	|f         }t          ||          }
|
                                }|
j        |j        d         k     rt#          d
          |	D ]}|                    |           |D ]}||         |         ||<   |j        }|j        }|j        }g d}t-          j        t          j        d          |          }|D ]}| j        |vr|dk    rt3          |||||g          \  }}||z
  }||z
  |z  }|d                    |d	d                   k    s
|dz   |z   |vr||z  }|}n*t3          |||||dz   |z   g          \  }}||z
  }||z
  |z  }||z  }t4          j                            |||          }|                    dd                              dd          }||j        |df<   ||j        |df<   ||j        |df<   ||j        |df<   t?          |          S )zvestimate the model and compute the Anova table

        Returns
        -------
        AnovaResults instance
        c                     g | ]}d |z  S )
C(%s, Sum)rM   )rO   rj   s     r   rQ   zAnovaRM.fit.<locals>.<listcomp>0  s    888q,"888r   r   *r6   Fr!   T:Nz$Independent variables are collinear.)F ValueNum DFDen DFPr > F)r   r.   r&   	Interceptr   zC( z, Sum)r   r   r   r   ) r6   r   r   r   r   patsydmatrixjoinr7   term_name_slicesr;   r[   r4   r   r   fitrankr   popr   rb   ra   r   r   r<   r   r   rc   rd   replacer`   AnovaResults)r   r   r   r   rs   r   r   r   r   term_excluder   resultsrj   r   rb   ra   r'   anova_tablessr1	df_resid1df1msmmsedf2r   pr   s                              r   r   zAnovaRM.fit&  s:    Idk") 98DK888-G9$M#((7++$)<<<m4 	- 	-C(E7171:-..C$(CC !!x}}K))*lAGAJ??aaafI Aq		))++:
""CDDD 	 	AOOA 	5 	5C*3/4K#k;;;l28F#3#3WEEE 	4 	4C|3&&3++=+="4q+vu#6 #6i(*czS(388GCRCL1111sW,K??.C"CC&81k6sW,-'/ '/OD) $h.C#:,C#IGJJq#s++{{4,,44XrBB34i025h/25h/23h/K(((r   )NNN)__name__
__module____qualname____doc__r   r   r   r   rM   r   r   r   r     sp        5 5n DH $$ $ $ $<0 0 0* * *B@) @) @) @) @)r   r   c                   $    e Zd ZdZd Zd Zd ZdS )r   zX
    Anova results class

    Attributes
    ----------
    anova_table : DataFrame
    c                     || _         d S N)r   )r   r   s     r   r   zAnovaResults.__init__q  s    &r   c                 N    |                                                                  S r   )summary__str__r   s    r   r   zAnovaResults.__str__t  s    ||~~%%'''r   c                     t          j                    }|                    d           |                    | j                   |S )zlcreate summary results

        Returns
        -------
        summary : summary2.Summary instance
        Anova)r
   Summary	add_titleadd_dfr   )r   summs     r   r   zAnovaResults.summaryw  s>     !!wD$%%%r   N)r   r   r   r   r   r   r   rM   r   r   r   r   i  sK         ' ' '( ( (    r   r   __main__)olsz	moore.csvr!   )partner_status
conformity	fcategoryfscore)skiprowsrG   z5conformity ~ C(fcategory, Sum)*C(partner_status, Sum)r   z#conformity ~ C(partner_status, Sum)r*   )r    )&statsmodels.compat.pythonr   numpyr;   pandasr   r   r   r   rx   r    statsmodels.formula.formulatoolsr   r   r	   statsmodels.iolibr
   #statsmodels.regression.linear_modelr   r   rI   r=   r>   r?   r   r   r   r   r   r   statsmodels.formula.apir  read_csvmoorer   moore_lmmooreBrH   rM   r   r   <module>r     s^   , , , , , ,         # # # # # # # #                
 ' & & & & & 3 3 3 3 3 3F F F"9> 9> 9>x4 4 4nV V Vp% % %Ng g gT    D}) }) }) }) }) }) }) })@       8 zMMM++++++
 FOK!#9 #9 #9: : :E sJ      #  S6UCCCGGIIF HX1%%%EEE/ r   