
    M/PhQ                        d Z ddlZddlmZmZmZmZmZm	Z	 ddl
mZ ddlZddlmZ ddlmZ ddlmZ ddlmZ d	Zd
ZddZ	 ddZd Z ee          d             Z G d de          Z G d d          Z G d d          ZdS )z*General linear model

author: Yichuan Liu
    N)eigvalsinvsolvematrix_rankpinvsvd)stats)
DesignInfo)Substitution)Model)summary2zrestructuredtext ena5  hypotheses : list[tuple]
    Hypothesis `L*B*M = C` to be tested where B is the parameters in
    regression Y = X*B. Each element is a tuple of length 2, 3, or 4:

      * (name, contrast_L)
      * (name, contrast_L, transform_M)
      * (name, contrast_L, transform_M, constant_C)

    containing a string `name`, the contrast matrix L, the transform
    matrix M (for transforming dependent variables), and right-hand side
    constant matrix constant_C, respectively.

    contrast_L : 2D array or an array of strings
        Left-hand side contrast matrix for hypotheses testing.
        If 2D array, each row is an hypotheses and each column is an
        independent variable. At least 1 row
        (1 by k_exog, the number of independent variables) is required.
        If an array of strings, it will be passed to
        patsy.DesignInfo().linear_constraint.

    transform_M : 2D array or an array of strings or None, optional
        Left hand side transform matrix.
        If `None` or left out, it is set to a k_endog by k_endog
        identity matrix (i.e. do not transform y matrix).
        If an array of strings, it will be passed to
        patsy.DesignInfo().linear_constraint.

    constant_C : 2D array or None, optional
        Right-hand side constant matrix.
        if `None` or left out it is set to a matrix of zeros
        Must has the same number of rows as contrast_L and the same
        number of columns as transform_M

    If `hypotheses` is None: 1) the effect of each independent variable
    on the dependent variables will be tested. Or 2) if model is created
    using a formula,  `hypotheses` will be created according to
    `design_info`. 1) and 2) is equivalent if no additional variables
    are created by the formula (e.g. dummy variables for categorical
    variables and interaction terms)
r   :0yE>c                 0   | }|}|j         \  }}|j         \  }}	||k    rt          d||fz            ||	z
  }
|dk    rt          |          }|                    |          }|                    |j                  }t          ||          |	k     rt          d          |                    |          }t          j        |j                            |          |j                            |                    }||
||fS |dk    rt          |d          \  }}}||k    	                                t          |          k     rt          d          d|z  }|j                            t          j        |                                        |j                                      |          }|j                            t          j        t          j        |d                                                  |          }t          j        |                              |                              |          }t          j        |j                            |          |j                            |                    }||
||fS t          d	|z            )
a$  
    Solve multivariate linear model y = x * params
    where y is dependent variables, x is independent variables

    Parameters
    ----------
    endog : array_like
        each column is a dependent variable
    exog : array_like
        each column is a independent variable
    method : str
        'svd' - Singular value decomposition
        'pinv' - Moore-Penrose pseudoinverse
    tolerance : float, a small positive number
        Tolerance for eigenvalue. Values smaller than tolerance is considered
        zero.
    Returns
    -------
    a tuple of matrices or values necessary for hypotheses testing

    .. [*] https://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_introreg_sect012.htm
    Notes
    -----
    Status: experimental and incomplete
    z8x(n=%d) and y(n=%d) should have the same number of rows!r   )tolzCovariance of x singular!r   r   g      ?   z%s is not a supported method!)shape
ValueErrorr   dotTr   npsubtractr   sumlendiagpower)endogexogmethod	toleranceyxnobsk_endognobs1k_exogdf_residpinv_xparamsinv_covtsscprusvinvss                       i/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/multivariate/multivariate_ols.py_multivariate_ols_fitr1   ;   s=   4 	AAGMD'7ME6u}} !$)4=1 2 2 	2 f}HaA **VX&&w9---668999 EE&MMACGGAJJ

33'511	5a))1a	M  3q66))8999Av''++AC0044Q77#''"'"(4"3"3445599!<<GAJJNN1!!&))ACGGAJJ

33'51186ABBB    c                    |}|}|}t          j        ||g          }| |k    }	|	                                }
| |	         }t          j        d |D                       }t          j        ||z
            dz
  dz  }||z
  dz
  dz  }g d}g d}t          j        ||          }d } |t          j        d|z
                      |j        d<    ||                                          |j        d	<    ||                                          |j        d
<    ||	                                          |j        d<   |||z
  dz   dz  z
  }||z  dz
  dz  }||z  }||z  ||z  z   dz
  dk    r0t          j
        ||z  |z  |z  dz
  ||z  ||z  z   dz
  z            }nd}||z  d|z  z
  }|j        d         }t          j        |d|z            }d|z
  |z  |z  |z  }||j        d<   ||j        d<   ||j        d<   t          j                            |||          }||j        d<   |j        d	         }|d|z  |z   dz   z  }|d|z  |z   dz   z  }||z  |z  ||z
  z  }||j        d<   ||j        d<   ||j        d<   t          j                            |||          }||j        d<   |j        d
         }|dk    rP|d|z  z   |d|z  z   z  dz  d|z  dz   z  |dz
  z  }||z  }d||z  dz   |dz
  z  z   }|dz
  dz  |z  }||z  |z  |z  }n$|d|z  |z   dz   z  }|||z  dz   z  }||z  |z  |z  }||j        d<   ||j        d<   ||j        d<   t          j                            |||          }||j        d<   |j        d         }t          j	        ||g          }|}||z
  |z   }||z  |z  }||j        d<   ||j        d<   ||j        d<   t          j                            |||          }||j        d<   |S )aV  
    For multivariate linear model Y = X * B
    Testing hypotheses
        L*B*M = 0
    where L is contrast matrix, B is the parameters of the
    multivariate linear model and M is dependent variable transform matrix.
        T = L*inv(X'X)*L'
        H = M'B'L'*inv(T)*LBM
        E =  M'(Y'Y - B'X'XB)M

    Parameters
    ----------
    eigenvals : ndarray
        The eigenvalues of inv(E + H)*H
    r_err_sscp : int
        Rank of E + H
    r_contrast : int
        Rank of T matrix
    df_resid : int
        Residual degree of freedom (n_samples minus n_variables of X)
    tolerance : float
        smaller than which eigenvalue is considered 0

    Returns
    -------
    A DataFrame

    References
    ----------
    .. [*] https://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_introreg_sect012.htm
    c                     g | ]
}|d |z
  z  S )    ).0is     r0   
<listcomp>z&multivariate_stats.<locals>.<listcomp>   s     111aa1q5k111r2   r5   r   )ValueNum DFDen DFF ValuePr > F)Wilks' lambdaPillai's traceHotelling-Lawley traceRoy's greatest root)columnsindexc                 8    t          j        | g          d         S )Nr   )r   real)r!   s    r0   fnzmultivariate_stats.<locals>.fn   s    ws||Ar2   )r?   r:   )r@   r:   )rA   r:   )rB   r:         r   )r?   r;   )r?   r<   )r?   r=   )r?   r>   )r@   r;   )r@   r<   )r@   r=   )r@   r>   )rA   r;   )rA   r<   )rA   r=   )rA   r>   )rB   r;   )rB   r<   )rB   r=   )rB   r>   )r   minr   arrayabspd	DataFrameprodlocmaxsqrtr   r	   fsf) 	eigenvals
r_err_sscp
r_contrastr&   r   r.   pqr-   indn_eeigv2eigv1mncolsrD   resultsrG   rr,   df1r*   df2lmdFpvalVUbcsigmas                                    r0   multivariate_statsrm   }   s   D 	AAA
1vA
i
C
''))CcNEH11511122E	A	aA	
QaA===D> > >El4!&( ( (G   -/Brwq5y/A/A,B,BGK()-/R		__GK)*57R		__GK1224"UYY[[//GK./	QUQYMA	
1qAA
a%CsQqSy1}qGQqSU1Wq[QqS1Q3Y]344
A#!)C
+.
/C
(3A

C	
SC##A-0GK)*-0GK)*./GK*+7::ac""D-1GK)*-.A
qsQw{
C
qsQw{
Cc	AQA.1GK*+.1GK*+/0GK+,7::ac""D.2GK*+56A1uu1WQqS!A%1q1QU;!e1Q37q1u%%1WMA#IMA1Q37Q;1Q37m#IMA69GK2369GK2378GK347::ac""D6:GK23K67E
1vA
C
a%!)Cc	EA36GK/036GK/045GK017::ac""D37GK/0Nr2   c                 2    fd}t          | |||          S )Nc                    \  }}}}|                      |                               |          |z
  }|                      |                               | j                  }t          |          }	|j                             t          |                                         |          }
|j                             |                               |          }||
|	|fS N)r   r   r   r   )LMCr(   r&   r)   r+   t1t2rY   HEfit_resultss               r0   rG   z"_multivariate_ols_test.<locals>.fn   s     ,7('5UU6]]q!!A%UU7^^$$OODHHSWW!!"%% CGGENNq!!!Q  r2   )_multivariate_test)
hypothesesrx   
exog_namesendog_namesrG   s    `   r0   _multivariate_ols_testr}      s2    ! ! ! ! ! j*k2FFFr2   hypotheses_docc           	         t          |          }t          |          }i }| D ]_}t          |          dk    r
|\  }}	d}
d}nVt          |          dk    r	|\  }}	}
d}n:t          |          dk    r|\  }}	}
}nt          dt          |          z            t          d |	D                       r(t          |                              |	          j        }	nqt          |	t          j                  rt          |	j	                  dk    rt          d          |	j	        d         |k    rt          d	|	j	        d         |fz            |
t          j
        |          }
nt          d
 |
D                       r-t          |                              |
          j        j        }
ns|
qt          |
t          j                  rt          |
j	                  dk    rt          d          |
j	        d         |k    rt          d|
j	        d         |fz            |-t          j        |	j	        d         |
j	        d         g          }n)t          |t          j                  st          d          |j	        d         |	j	        d         k    r*t          d|	j	        d         |j	        d         fz            |j	        d         |
j	        d         k    r*t          d|
j	        d         |j	        d         fz             ||	|
|          \  }}}}t          j        ||          }t          |          }t          j        t!          t#          ||                              }t%          ||||          }||	|
|||d||<   a|S )ac  
    Multivariate linear model hypotheses testing

    For y = x * params, where y are the dependent variables and x are the
    independent variables, testing L * params * M = 0 where L is the contrast
    matrix for hypotheses testing and M is the transformation matrix for
    transforming the dependent variables in y.

    Algorithm:
        T = L*inv(X'X)*L'
        H = M'B'L'*inv(T)*LBM
        E =  M'(Y'Y - B'X'XB)M
    where H and E correspond to the numerator and denominator of a univariate
    F-test. Then find the eigenvalues of inv(H + E)*H from which the
    multivariate test statistics are calculated.

    .. [*] https://support.sas.com/documentation/cdl/en/statug/63033/HTML
           /default/viewer.htm#statug_introreg_sect012.htm

    Parameters
    ----------
    %(hypotheses_doc)s
    k_xvar : int
        The number of independent variables
    k_yvar : int
        The number of dependent variables
    fn : function
        a function fn(contrast_L, transform_M) that returns E, H, q, df_resid
        where q is the rank of T matrix

    Returns
    -------
    results : MANOVAResults
    r   N   rH   zBhypotheses must be a tuple of length 2, 3 or 4. len(hypotheses)=%dc              3   @   K   | ]}t          |t                    V  d S rp   
isinstancestrr7   js     r0   	<genexpr>z%_multivariate_test.<locals>.<genexpr>=  s,      --az!S!!------r2   z&Contrast matrix L must be a 2-d array!r5   zJContrast matrix L should have the same number of columns as exog! %d != %dc              3   @   K   | ]}t          |t                    V  d S rp   r   r   s     r0   r   z%_multivariate_test.<locals>.<genexpr>H  s,      //As##//////r2   z'Transform matrix M must be a 2-d array!r   zbTransform matrix M should have the same number of rows as the number of columns of endog! %d != %dz&Constant matrix C must be a 2-d array!zCcontrast L and constant C must have the same number of rows! %d!=%dzGtransform M and constant C must have the same number of columns! %d!=%d)stat
contrast_Ltransform_M
constant_Crw   rv   )r   r   anyr
   linear_constraintcoefsr   r   ndarrayr   eyer   zerosaddr   sortr   r   rm   )rz   r{   r|   rG   k_xvark_yvarra   hyponamerq   rr   rs   rw   rv   rY   r&   EHrX   r\   
stat_tables                       r0   ry   ry     s   J __FFG :) :)t99q==GD!AAAYY!^^JD!QAAYY!^^ MD!Q 358YY? @ @ @--1----- 	7:&&88;;AAAa,, KAG0A0A !IJJJwqzV##  "G"#'!*f!5"6 7 7 7 9vAA//Q///// 
	;;''99!<<BDAA}!!RZ00 PCLLA4E4E$%NOOO71:''$ &: '(gaj&%9&: ; ; ; 9!'!*agaj122AAArz** 	GEFFF71:## 6 !
AGAJ78 9 9 9 71:## 9 !
AGAJ78 9 9 9 Bq!QKK1aVAq\\OO b!--..'q!X>>
!+1()a) ) Nr2   c                   0     e Zd ZdZdZd fd	ZddZ xZS )	_MultivariateOLSa  
    Multivariate linear model via least squares


    Parameters
    ----------
    endog : array_like
        Dependent variables. A nobs x k_endog array where nobs is
        the number of observations and k_endog is the number of dependent
        variables
    exog : array_like
        Independent variables. A nobs x k_exog array where nobs is the
        number of observations and k_exog is the number of independent
        variables. An intercept is not included by default and should be added
        by the user (models specified using a formula include an intercept by
        default)

    Attributes
    ----------
    endog : ndarray
        See Parameters.
    exog : ndarray
        See Parameters.
    Nnonec                     t          |j                  dk    s|j        d         dk    rt          d           t                      j        ||f||d| d S )Nr5   zGThere must be more than one dependent variable to fit multivariate OLS!)missinghasconst)r   r   r   super__init__)selfr   r   r   r   kwargs	__class__s         r0   r   z_MultivariateOLS.__init__  s    u{q  EKNa$7$7 9 : : : 	Lg8@	L 	LDJ	L 	L 	L 	L 	Lr2   r   c                 b    t          | j        | j        |          | _        t	          |           S )N)r   )r1   r   r   
_fittedmod_MultivariateOLSResults)r   r   s     r0   fitz_MultivariateOLS.fit  s1    /J	&2 2 2&t,,,r2   )r   N)r   )__name__
__module____qualname____doc___formula_max_endogr   r   __classcell__)r   s   @r0   r   r   n  si         0 L L L L L L- - - - - - - -r2   r   c                   P    e Zd ZdZd Zd Z ee          d	d            Zd Z	dS )
r   z(
    _MultivariateOLS results class
    c                     t          |d          r't          |j        d          r|j        j        | _        nd | _        |j        | _        |j        | _        |j        | _        d S )Ndatadesign_info)hasattrr   r   r{   r|   r   )r   fitted_mv_olss     r0   r   z _MultivariateOLSResults.__init__  se    M6** 	$*M::	$,1=D#D'2(4'2r2   c                 N    |                                                                  S rp   summary__str__r   s    r0   r   z_MultivariateOLSResults.__str__      ||~~%%'''r2   r~   NFc                    t          | j                  }|| j        Y| j        j        }g }|D ]G}|r|dk    rt	          j        |          ||         ddf         }|                    ||dg           HnLg }t          |          D ]:}d|z  }t	          j        d|g          }	d|	|<   |                    ||	dg           ;t          || j
        | j        | j                  }
t          |
| j        | j                  S )aO  
        Linear hypotheses testing

        Parameters
        ----------
        %(hypotheses_doc)s
        skip_intercept_test : bool
            If true, then testing the intercept is skipped, the model is not
            changed.
            Note: If a term has a numerically insignificant effect, then
            an exception because of emtpy arrays may be raised. This can
            happen for the intercept if the data has been demeaned.

        Returns
        -------
        results: _MultivariateOLSResults

        Notes
        -----
        Tests hypotheses of the form

            L * params * M = C

        where `params` is the regression coefficient matrix for the
        linear model y = x * params, `L` is the contrast matrix, `M` is the
        dependent variable transform matrix and C is the constant matrix.
        N	Interceptzx%dr5   )r   r{   r   term_name_slicesr   r   appendranger   r}   r   r|   MultivariateTestResults)r   rz   skip_intercept_testr   termskey
L_contrastr8   r   rq   ra   s              r0   mv_testz_MultivariateOLSResults.mv_test  s7   : T_%%+(9
  ? ?C* !sk/A/A !#c
AAA!>J%%sJ&=>>>>	?  
v 7 7A A;D!V--AAaD%%tQo6666(T_*./4;KM M 'w'+'7'+8 8 	8r2   c                     t           rp   )NotImplementedErrorr   s    r0   r   z_MultivariateOLSResults.summary  s    !!r2   )NF)
r   r   r   r   r   r   r   _hypotheses_docr   r   r6   r2   r0   r   r     sz         3 3 3( ( ( \11138 38 38 2138j" " " " "r2   r   c                   F    e Zd ZdZd Zd Zd Zed             Z	 	 d	dZ	dS )
r   aE  
    Multivariate test results class

    Returned by `mv_test` method of `_MultivariateOLSResults` class

    Parameters
    ----------
    results : dict[str, dict]
        Dictionary containing test results. See the description
        below for the expected format.
    endog_names : sequence[str]
        A list or other sequence of endogenous variables names
    exog_names : sequence[str]
        A list of other sequence of exogenous variables names

    Attributes
    ----------
    results : dict
        Each hypothesis is contained in a single`key`. Each test must
        have the following keys:

        * 'stat' - contains the multivariate test results
        * 'contrast_L' - contains the contrast_L matrix
        * 'transform_M' - contains the transform_M matrix
        * 'constant_C' - contains the constant_C matrix
        * 'H' - contains an intermediate Hypothesis matrix,
          or the between groups sums of squares and cross-products matrix,
          corresponding to the numerator of the univariate F test.
        * 'E' - contains an intermediate Error matrix,
          corresponding to the denominator of the univariate F test.
          The Hypotheses and Error matrices can be used to calculate
          the same test statistics in 'stat', as well as to calculate
          the discriminant function (canonical correlates) from the
          eigenvectors of inv(E)H.

    endog_names : list[str]
        The endogenous names
    exog_names : list[str]
        The exogenous names
    summary_frame : DataFrame
        Returns results as a MultiIndex DataFrame
    c                 d    || _         t          |          | _        t          |          | _        d S rp   )ra   listr|   r{   )r   ra   r|   r{   s       r0   r   z MultivariateTestResults.__init__  s,    ,,z**r2   c                 N    |                                                                  S rp   r   r   s    r0   r   zMultivariateTestResults.__str__  r   r2   c                     | j         |         S rp   )ra   )r   items     r0   __getitem__z#MultivariateTestResults.__getitem__  s    |D!!r2   c                 h   g }| j         D ]\}| j         |         d                                         }||j        dddf<   |                    |                                           ]t          j        |d          }|                    ddg          }|j        	                    ddgd	           |S )
z:
        Return results as a multiindex dataframe
        r   NEffectr   )axisrD   	StatisticT)inplace)
ra   copyrP   r   reset_indexrM   concat	set_indexrD   	set_names)r   dfr   tmps       r0   summary_framez%MultivariateTestResults.summary_frame  s    
 < 	) 	)C,s#F+0022C#&CGAAAxK IIcoo''((((Yr"""\\8W-..
Hk2DAAA	r2   Fc                    t          j                    }|                    d           | j        D ]}|                    ddi           | j        |         d                                         }|                                }t          |j                  }||d<   ||_        g d|_	        |
                    |           |rX|                    |di           t          j        | j        |         d         | j                  }|
                    |           |rX|                    |d	i           t          j        | j        |         d
         | j                  }|
                    |           |rQ|                    |di           t          j        | j        |         d                   }|
                    |           |S )a6  
        Summary of test results

        Parameters
        ----------
        show_contrast_L : bool
            Whether to show contrast_L matrix
        show_transform_M : bool
            Whether to show transform_M matrix
        show_constant_C : bool
            Whether to show the constant_C
        zMultivariate linear model r   r   )r   r   r   r   z contrast L=r   )rC   z transform M=r   )rD   z constant C=r   )r   Summary	add_titlera   add_dictr   r   r   rC   rD   add_dfrM   rN   r{   r|   )r   show_contrast_Lshow_transform_Mshow_constant_Csummr   r   rk   s           r0   r   zMultivariateTestResults.summary'  s    !!2333< 	  	 CMM2r(###c"6*//11B!!BRZ  AAaDBJ'''BHKKOOO  sN3444\$,s"3L"A*./; ; ;B  sO4555\$,s"3M"B(,(8: : :B  sN3444\$,s"3L"ABBBr2   N)FFF)
r   r   r   r   r   r   r   propertyr   r   r6   r2   r0   r   r     s        ) )V+ + +
( ( (" " "   X ?D %' ' ' ' ' 'r2   r   )r   r   )r   )r   numpyr   numpy.linalgr   r   r   r   r   r   scipyr	   pandasrM   patsyr
   statsmodels.compat.pandasr   statsmodels.base.modelr   statsmodels.iolibr   __docformat__r   r1   rm   r}   ry   r   r   r   r6   r2   r0   <module>r      s        D D D D D D D D D D D D D D D D                 2 2 2 2 2 2 ( ( ( ( ( ( & & & & & &%' V?C ?C ?C ?CH 8<t t t tnG G G( _---b b .-bJ%- %- %- %- %-u %- %- %-PH" H" H" H" H" H" H" H"Vm m m m m m m m m mr2   