
    M/Ph#                     	   d Z ddlmZ ddlZddlmZ d;dZd Z	d Z
dZd	Zd
 Zd Zd Zedk    rdZej                            dedf                              defdefdefdefg          Zej                            edf                              defdefg          ZddlZej        j                            eefd          Z ej        edfe          Z ej!        j"        D ]Z#ee#         e e#<   ej!        j"        D ]Z#ee#         e e#<   	  ede           \  Z$Z" ed e           \  Z$Z" ej(        d! e"D                       Z)e)*                    d          d"ej                            e          z  z   Z+ ej,        e+e)          -                                Z. e&e.j/                    e&e ee.          z              ej(        d#  ed$e"          D                       Z)e)*                    d          d"ej                            e          z  z   Z+ ej,        e+e)          -                                Z. e&e.j/                    e&e ee.          z              ej!        d%efd&efd'efd(efd)efd*efd+efd,efg          Z0 ej1        d-e0d.d/          Z2 e&d0d1 e2j!        j"        D                        e2j3                            e4          Z5e56                    d2 e7e2j!        j"                            8                    d          Z9e2e9 ddf         j:        Z; e&e;j<                    e&e;j!                    ed3e;          \  Z=Z> ej(        d4  ed5e>          D                       Z?e;d,         Z@ ej,        e@e?          -                                ZA e&eAj/                    e&e eeA          z             d6B                    e2j!        j"        dd2                   ZC ed7e;          \  ZDZE ej(        d8  ed5eE          D                       ZFe;d,         ZG ej,        eGeF          -                                ZH e&eHj/                    e&e eeH          z             eED ]wZI e&d9eIf            ej(        d:  eeIeE          D                       ZJe;d,         ZK ej,        eKeJ          -                                ZL e&e eeL          z             vdS dS )<a   convenience functions for ANOVA type analysis with OLS

Note: statistical results of ANOVA are not checked, OLS is
checked but not whether the reported results are the ones used
in ANOVA

includes form2design for creating dummy variables

TODO:
 * ...
 *

    )lmapNFc                    |                                  } t          j        |           }|r(| dddf         |k                        t                    S | dddf         |k                        t                    ddddf         S )z|convert array of categories to dummy variables
    by default drops dummy variable for last category
    uses ravel, 1d onlyN)ravelnpuniqueastypeint)x	returnallgroupss      l/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/sandbox/regression/try_ols_anova.py
data2dummyr      s     	
		AYq\\F 9!!!T'
f$,,S111!!!T'
f$,,S11!!!CRC%88    c                    t          j        t          t          |                                                     }| |dddddf         k                        d          j                            t                    ddddf         S )zcreates product dummy variables from 2 columns of 2d array

    drops last dummy variable, but not from each category
    singular with simple dummy variable but not with constant

    quickly written, no safeguards

    Nr   )	r   r   r   tupletolistallTr	   r
   )r   r   s     r   data2proddummyr      ss     YtE188::..//FvaaaQQQh$$R((*11#66qqq"u==r   c                 \    |j         dk    r|dddf         }t          | d          }||z  S )zcreate dummy continuous variable

    Parameters
    ----------
    x1 : 1d array
        label or group array
    x2 : 1d array (float)
        continuous variable

    Notes
    -----
    useful for group specific slope coefficients in regression
       NT)r   )ndimr   )x1x2dummys      r   data2groupcontr   .   s<     
w!||$ZrT***E2:r   aW  
ANOVA statistics (model sum of squares excludes constant)
Source    DF  Sum Squares   Mean Square    F Value    Pr > F
Model     %(df_model)i        %(ess)f       %(mse_model)f   %(fvalue)f %(f_pvalue)f
Error     %(df_resid)i     %(ssr)f       %(mse_resid)f
CTotal    %(nobs)i    %(uncentered_tss)f     %(mse_total)f

R squared  %(rsquared)f
a]  
ANOVA statistics (model sum of squares includes constant)
Source    DF  Sum Squares   Mean Square    F Value    Pr > F
Model     %(df_model)i      %(ssmwithmean)f       %(mse_model)f   %(fvalue)f %(f_pvalue)f
Error     %(df_resid)i     %(ssr)f       %(mse_resid)f
CTotal    %(nobs)i    %(uncentered_tss)f     %(mse_total)f

R squared  %(rsquared)f
c                     i }|                     | j                   g d}|D ]}t          | |          ||<   | j        j        |d<   | j        | j        z
  |d<   |S )zjupdate regression results dictionary with ANOVA specific statistics

    not checked for completeness
    )df_modeldf_residessssruncentered_tss	mse_model	mse_resid	mse_totalfvaluef_pvaluersquarednobsssmwithmean)update__dict__getattrmodelr*   r#   r"   )resad
anova_attrkeys       r   	anovadictr4   [   sz    
 
BIIcl  J  $ $#s##3BvJ*SW4B}Ir   c                    i }g }|                                  D ]#}|dk    r8t          j        |j        d                   |d<   |                    d           Ad|vr!||         ||<   |                    |           f|dd         dk    rI|                     d          d         }t          ||                   ||<   |                    |           |dd         d	k    r|                     d          d                              d
          }t          t          j        ||d                  ||d                  f                   |d                    |          <   |                    d                    |                     r|dd         dk    r|                     d          d                              d
          }t          ||d                  ||d                            |d                    |          <   |                    d                    |                     t          d          ||fS )a  convert string formula to data dictionary

    ss : str
     * I : add constant
     * varname : for simple varnames data is used as is
     * F:varname : create dummy variables for factor varname
     * P:varname1*varname2 : create product dummy variables for
       varnames
     * G:varname1*varname2 : create product between factor and
       continuous variable
    data : dict or structured array
       data set, access of variables by name as in dictionaries

    Returns
    -------
    vars : dictionary
        dictionary of variables with converted dummy variables
    names : list
        list of names, product (P:) and grouped continuous
        variables (G:) have name by joining individual names
        sorted according to input

    Examples
    --------
    >>> xx, n = form2design('I a F:b P:c*d G:c*f', testdata)
    >>> xx.keys()
    ['a', 'b', 'const', 'cf', 'cd']
    >>> n
    ['const', 'a', 'b', 'cd', 'cf']

    Notes
    -----

    with sorted dict, separate name list would not be necessary
    Ir   const:N   zF:r   zP:* zG:zunknown expression in formula)splitr   onesshapeappendr   r   c_joinr   
ValueError)ssdatavarsnamesitemvs         r   form2designrI   l   s   H DE

 > >3;;GDJqM22DMLL!!!!__dDJLL"1"X

3"A a))DGLLOOOO"1"X

3"((--A-beD1JtAaDz4I.JKKDLL$$$$"1"X

3"((--A-d1Q4j$qt*EEDLL$$$$<===;r   c                 r    |dd         }|                                  D ]}|                    |           |S )zwdrop names from a list of strings,
    names to drop are in space delimited list
    does not change original list
    N)r<   remove)rC   linewlirG   s       r   dropnamerN      sB    
 qqqEE

  TLr   __main__i        )sizeabcdr9   efT)flattenr   zF:azP:a*bza F:b P:c*dzI a F:b P:c*dzI a F:b P:c*d G:a*e fc                 (    g | ]}t           |         S  xx.0nns     r   
<listcomp>ra      s    000BB000r   g{Gz?c                 (    g | ]}t           |         S r[   r\   r^   s     r   ra   ra      s    BBBBBBBBr   zae fbreedsexlitterpenpigagebageyzdftest3.data.)missingusemaskrl   c                 V    g | ]&}t           j        |                                         'S r[   )dtamasksum)r_   ks     r   ra   ra      s(    AAAAchqkoo''AAAr   r   zI F:sex agec                 (    g | ]}t           |         S r[   )xx_b1r^   s     r   ra   ra      s    GGG"E"IGGGr   r;    z'I F:breed F:sex F:litter F:pen age bagec                 (    g | ]}t           |         S r[   xx_b1ar^   s     r   ra   ra     s    JJJBVBZJJJr   z
Results droppingc                 (    g | ]}t           |         S r[   rw   r^   s     r   ra   ra     s    !R!R!R&*!R!R!Rr   )F)M__doc__statsmodels.compat.pythonr   numpyr   statsmodels.apiapismr   r   r   
anova_str0	anova_strr4   rI   rN   __name__r*   randomrandintviewr
   testdataintnormalfloattestdatacontnumpy.lib.recfunctionslibrecfunctions	zip_descrdt2emptytestdatadtyperF   namer]   nprintr@   column_stackXrq   rj   OLSfitrest1paramsdt_b
genfromtxtro   rp   boolmreshapelenanydroprowsrD   
dta_use_b1r>   rt   names_b1X_b1y_b1rest_b1rA   allexogrx   	names_b1aX_b1ay_b1arest_b1adropnX_b1a_y_b1a_	rest_b1a_r[   r   r   <module>r      s]    + * * * * *          	9 	9 	9 	9> > >  0
	  "; ; ;z   z
 D)##AT!H#55::SIs3iQTUXPY[^_bZc;deeK9##4(#33883u+E{9STTL!!!!
)
 
*
*K+Ft
*
T
TCrxa#&&H!' + +$T*"( , ,%d+
8 OX66IB3X>>IB 	00%00011A	a4	((t(5555ABF1QKKOOE	E%,	E)ii&&
&''' 	BB((65*A*ABBBCCA	a4	((t(5555ABF1QKKOOE	E%,	E)ii&&
&''' 28gs^eS\Hc?3<%u~E?S%L2 3 3D "-S$
G
G
GC	E)AAAAABBBdAyyCC	001155a88H hYqqq[!&J	E*
	E*
 "k-<<OE82?GGX0F0FGGGHHDc?DbfT4  $$&&G	E'.	E)ii((
())) hhsyss+,,G $$MzZZFIBOJJ((2y2I2IJJJKKEsOErveU##''))H	E(/	E)ii))
)*** 0 0#U+,,, !R!Rxxy7Q7Q!R!R!RSSCBF66**..00	i))I...////U H0 0r   