§
    M/Ph  ã                   ó<   — d Z ddlZddlmZ d„ Zd
d„Zd
d„Zdd	„ZdS )z‹Tools for multivariate analysis


Author : Josef Perktold
License : BSD-3


TODO:

- names of functions, currently just "working titles"

é    N)ÚBunchc                 óÊ   — | |}}t           j                             |¦  «                             |¦  «        }|                     |¦  «        }||z
  }t	          |||¬¦  «        }|S )a  helper function to get linear projection or partialling out of variables

    endog variables are projected on exog variables

    Parameters
    ----------
    endog : ndarray
        array of variables where the effect of exog is partialled out.
    exog : ndarray
        array of variables on which the endog variables are projected.

    Returns
    -------
    res : instance of Bunch with

        - params : OLS parameter estimates from projection of endog on exog
        - fittedvalues : predicted values of endog given exog
        - resid : residual of the regression, values of endog with effect of
          exog partialled out

    Notes
    -----
    This is no-frills mainly for internal calculations, no error checking or
    array conversion is performed, at least for now.

    )ÚparamsÚfittedvaluesÚresid)ÚnpÚlinalgÚpinvÚdotr   )ÚendogÚexogÚx1Úx2r   Ú	predictedÚresidualÚress           úd/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/stats/multivariate_tools.pyÚpartial_projectr      si   € ð6 Dˆ€BÝŒY^Š^˜BÑÔ×#Ò# BÑ'Ô'€FØ—’v‘”€IØI‰~€HÝ
vØ&Øð ñ  ô  €Cð €Jó    TFc                 ó`  — |s|r0| |                       d¦  «        z
  } ||                      d¦  «        z
  }|r0| |                      d¦  «        z  } ||                     d¦  «        z  }t          j                             | ¦  «                             |¦  «        }t          j                             |¦  «                             | ¦  «        }|                     |¦  «        }t          j        t          j                             |¦  «        ¦  «        }|                     ¦   «          |ddd…         S )a…  canonical correlation coefficient beween 2 arrays

    Parameters
    ----------
    x1, x2 : ndarrays, 2_D
        two 2-dimensional data arrays, observations in rows, variables in columns
    demean : bool
         If demean is true, then the mean is subtracted from each variable
    standardize : bool
         If standardize is true, then each variable is demeaned and divided by
         its standard deviation. Rescaling does not change the canonical
         correlation coefficients.

    Returns
    -------
    ccorr : ndarray, 1d
        canonical correlation coefficients, sorted from largest to smallest.
        Note, that these are the square root of the eigenvalues.

    Notes
    -----
    This is a helper function for other statistical functions. It only
    calculates the canonical correlation coefficients and does not do a full
    canoncial correlation analysis

    The canonical correlation coefficient is calculated with the generalized
    matrix inverse and does not raise an exception if one of the data arrays
    have less than full column rank.

    See Also
    --------
    cc_ranktest
    cc_stats
    CCA not yet

    r   Néÿÿÿÿ)	ÚmeanÚstdr   r	   r
   r   ÚsqrtÚeigvalsÚsort)r   r   ÚdemeanÚstandardizeÚt1Út2ÚmÚccs           r   Úcancorrr#   <   sô   € ðL ð ð Ø2—7’7˜1‘:”:‰oˆØ2—7’7˜1‘:”:‰oˆàð à
ˆbfŠfQ‰iŒi‰ˆØ
ˆbfŠfQ‰iŒi‰ˆå	ŒŠ˜Ñ	Ô	×	Ò	 Ñ	#Ô	#€BÝ	ŒŠ˜Ñ	Ô	×	Ò	 Ñ	#Ô	#€BØ
Šˆr‰
Œ
€AÝ	Œ•”×"Ò" 1Ñ%Ô%Ñ	&Ô	&€BØ‡G‚GI„I€IØˆdˆdˆdŒ8€Or   c           	      óÀ  — ddl m} | j        \  }}|j        \  }}t          | ||¬¦  «        }	|	|	z  }
|rvt	          j        ||z
  ¦  «        dz   }||
d         z  }||
d         d|
d         z
  z  z  }||j                             ||¦  «        ||	||j                             ||¦  «        fS t	          j        t          ||¦  «        ¦  «        ddd…         }||z
  ||z
  z  }||
ddd…          
                    ¦   «         z  }||
d|
z
  z  ddd…          
                    ¦   «         z  }||j                             ||¦  «        ||	||j                             ||¦  «        fS )aj  rank tests based on smallest canonical correlation coefficients

    Anderson canonical correlations test (LM test) and
    Cragg-Donald test (Wald test)
    Assumes homoskedasticity and independent observations, overrejects if
    there is heteroscedasticity or autocorrelation.

    The Null Hypothesis is that the rank is k - 1, the alternative hypothesis
    is that the rank is at least k.


    Parameters
    ----------
    x1, x2 : ndarrays, 2_D
        two 2-dimensional data arrays, observations in rows, variables in columns
    demean : bool
         If demean is true, then the mean is subtracted from each variable.
    fullrank : bool
         If true, then only the test that the matrix has full rank is returned.
         If false, the test for all possible ranks are returned. However, no
         the p-values are not corrected for the multiplicity of tests.

    Returns
    -------
    value : float
        value of the test statistic
    p-value : float
        p-value for the test Null Hypothesis tha the smallest canonical
        correlation coefficient is zero. based on chi-square distribution
    df : int
        degrees of freedom for thechi-square distribution in the hypothesis test
    ccorr : ndarray, 1d
        All canonical correlation coefficients sorted from largest to smallest.

    Notes
    -----
    Degrees of freedom for the distribution of the test statistic are based on
    number of columns of x1 and x2 and not on their matrix rank.
    (I'm not sure yet what the interpretation of the test is if x1 or x2 are of
    reduced rank.)

    See Also
    --------
    cancorr
    cc_stats

    r   )Ústats©r   é   r   g      ð?N)Úscipyr%   Úshaper#   r   ÚabsÚchi2ÚsfÚarangeÚminÚcumsum)r   r   r   Úfullrankr%   Únobs1Úk1Únobs2Úk2r"   Úcc2ÚdfÚvalueÚw_valueÚrÚvaluesÚw_valuess                    r   Úcc_ranktestr<   s   sx  € ðb ÐÐÐÐÐà”I€Eˆ2Ø”I€Eˆ2å	R Ð	'Ñ	'Ô	'€BØ
ˆr‰'€CØð 
`ÝŒVB˜‘G‰_Œ_˜qÑ ˆØ˜˜Bœ‘ˆØ˜3˜rœ7 b¨3¨r¬7¡lÑ3Ñ4ˆØe”j—m’m E¨2Ñ.Ô.°°B¸ÀÄÇÂÈwÐXZÑA[ÔA[Ð[Ð[åŒI•c˜"˜b‘k”kÑ"Ô" 4 4 R 4Ô(ˆØ1‰f˜˜a™Ñ ˆØ˜˜T˜T˜r˜Tœ×)Ò)Ñ+Ô+Ñ+ˆØ˜C 2¨¡8Ñ,¨d¨d°¨dÔ3×:Ò:Ñ<Ô<Ñ<ˆØu”z—}’} V¨RÑ0Ô0°"°b¸(ÀEÄJÇMÂMÐRZÐ\^ÑD_ÔD_Ð_Ð_r   c                 óà  — | j         \  }}|j         \  }}t          | ||¬¦  «        }|dz  }|d|z
  z  }	||z  }
|||z
  |z
  z  }t          |
|¦  «        }d|
|z
  z  }d||z
  dz
  z  }||
z  }|}|                     ¦   «         }t	          j        dd|	z   z  ¦  «        }|	                     ¦   «         }|	                     ¦   «         }i }||d<   |	|d<   ||d<   ||d<   ||d	<   ||d
<   ||d<   ||d<   |S )a·  MANOVA statistics based on canonical correlation coefficient

    Calculates Pillai's Trace, Wilk's Lambda, Hotelling's Trace and
    Roy's Largest Root.

    Parameters
    ----------
    x1, x2 : ndarrays, 2_D
        two 2-dimensional data arrays, observations in rows, variables in columns
    demean : bool
         If demean is true, then the mean is subtracted from each variable.

    Returns
    -------
    res : dict
        Dictionary containing the test statistics.

    Notes
    -----

    same as `canon` in Stata

    missing: F-statistics and p-values

    TODO: should return a results class instead
    produces nans sometimes, singular, perfect correlation of x1, x2 ?

    r&   é   r'   g      à?z!canonical correlation coefficientÚeigenvalueszPillai's TracezWilk's LambdazHotelling's TracezRoy's Largest RootÚdf_residÚdf_m)r)   r#   r.   Úsumr   ÚproductÚmax)r   r   r   r1   r2   r3   r4   r"   r5   ÚlamÚdf_modelr@   Úsr!   ÚnÚdf1Údf2Úpt_valueÚwl_valueÚht_valueÚrm_valuer   s                         r   Úcc_statsrO   ¸   s>  € ð< ”I€Eˆ2Ø”I€Eˆ2Ý	R Ð	'Ñ	'Ô	'€BØ
ˆa‰%€CØ!c‘'‰?€Cð B‰w€HØU˜R‘Z &Ñ(Ñ)€HÝˆHbÑÔ€AØˆx˜"‰}Ñ€AØˆx˜"‰}˜qÑ Ñ!€Aà
ˆx‰-€CØ
€Cð wŠw‰yŒy€HÝŒz˜!˜q 3™w™-Ñ(Ô(€HØwŠw‰yŒy€HØwŠw‰yŒy€Hð €CØ/1€CÐ+Ñ,Ø€CˆÑØ$€CÐÑØ#€CˆÑØ'€CÐÑØ (€CÐÑØ€Cˆ
OØ€CˆKØ€Jr   )TF)T)	Ú__doc__Únumpyr   Ústatsmodels.tools.toolsr   r   r#   r<   rO   © r   r   ú<module>rT      s   ððð ð  Ð Ð Ð à )Ð )Ð )Ð )Ð )Ð )ð#ð #ð #ðN4ð 4ð 4ð 4ðnB`ð B`ð B`ð B`ðJ>ð >ð >ð >ð >ð >r   