
    M/Ph|                     (    d dl Z G d d          ZdS )    Nc                   v    e Zd ZdZdZd ZddZd Zd Zd Z	d	 Z
d
 ZddZddZd Zd ZddZddZddZdS )Pcaz
    A basic class for Principal Component Analysis (PCA).

    p is the number of dimensions, while N is the number of data points
    )rgbcymkc                     | j         }|t          j        |d          z
  }|t          j        |d          z  }|| _        || _        d | _        d S )Nr   axis)AnpmeanstdMN_eig)selfr   r   r   s       W/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/sandbox/pca.py__calcz
Pca.__calc   sT    F
BGA1

BF1!
			    Nc           	      6   t          j        |          j        }|j        \  }}||c| _        | _        ||k    rddlm}  |dt                     || _	        |
                                | _        |                                  t          j        | j        t          |dz
  t!          | j                  z            dz             d|         | _        |"t!          |          |k    rt#          d          |dnt%          d |D                       | _        dS )z$
        p X N matrix input
        r   )warnzp > n - intentional?   Nznames must match data dimensionc                 ,    g | ]}t          |          S  )str).0xs     r   
<listcomp>z Pca.__init__.<locals>.<listcomp>(   s    6M6M6M!s1vv6M6M6Mr   )r   arrayTshapenpwarningsr   RuntimeWarningr   copy_origA
_Pca__calctile_colorsintlen
ValueErrortuplenames)r   datar3   r   r&   r'   r   s          r   __init__zPca.__init__   s
    HTNNg!!tvq55%%%%%%D'888FFHHgdl3!S5F5F/F+G+G+IJJ2A2NUq>???"]TT6M6Mu6M6M6M0N0N


r   c                 >    t          j        | j        j                  S )z?
        returns the covariance matrix for the dataset
        )r   covr   r$   r   s    r   getCovarianceMatrixzPca.getCovarianceMatrix+   s     vdfhr   c                    | j         {t          j                            |                                           }t          j        |d                   ddd         }|d         |         |d         dd|f         f}|| _         | j         S )zQ
        returns a tuple of (eigenvalues,eigenvectors) for the data set.
        Nr   r   )r   r   linalgeigr9   argsort)r   ressortis      r   getEigensystemzPca.getEigensystem1   sz     9)-- 8 8 : :;;C*SV$$TTrT*EQs1vaaag/CDIyr   c                 6    |                                  d         S )Nr   rA   r8   s    r   getEigenvalueszPca.getEigenvalues<       ""$$Q''r   c                 6    |                                  d         S )Nr   rC   r8   s    r   getEigenvectorszPca.getEigenvectors?   rE   r   c                 X    |                                  }|t          j        |          z  S )z=
        "energies" are just normalized eigenvectors
        )rD   r   sum)r   vs     r   getEnergieszPca.getEnergiesB   s'     



{r   r   r   Tc           
         ddl m} | j        dd|f         | j        dd|f         }}|r|                                 |                    ||           |                                 \  }}|                                \  }	}
|                                \  }}|
|	z
  ||z
  }}t          ||j	        | j
                  D ]@\  }}}|                    dd|||         z  |||         z  d||z  dz  dz  z  ||           A| j        N|                    d| j        |         z   dz              |                    d| j        |         z   dz              dS dS )	z
        Generates a 2-dimensional plot of the data set and principle components
        using matplotlib.

        ix specifies which p-dimension to put on the x-axis of the plot
        and iy specifies which to put on the y-axis (0-indexed)
        r   Ng?   g      ?)
head_widthfcec$z/\sigma$)matplotlib.pyplotpyplotr   clfscatterrA   xlimylimzipr$   r.   arrowr3   xlabelylabel)r   ixiyrT   pltr!   r	   valsevsxlxuylyudxdyvalvecr   s                     r   plot2dz
Pca.plot2dI   su    	('''''F111R4L"! 	GGIIIAa$$&&Shhjj2hhjj2"ur"u2T#%55 	\ 	\ICAIIa#c"g+c#b'kT2b57S.=PTUYZI[[[[ :!JJs4:b>)+5666JJs4:b>)+566666 "!r      c           	      d   ddl mc m} |r|                                 t	          j        d          }|                                 |                                 z  }|                    |||||         ||         ||         d           |	                    | j
        dd|f         | j
        dd|f         | j
        dd|f         d           | j        rD|                    | j        |         dz   | j        |         dz   | j        |         dz              dS |                                 dS )	z
        Generates a 3-dimensional plot of the data set and principle components
        using mayavi.

        ix, iy, and iz specify which of the input p-dimensions to place on each of
        the x,y,z axes, respectively (0-indexed).
        r   N      )scale_factorg333333?z/sigma)rZ   r[   zlabel)enthought.mayavi.mlabmayavimlabrT   r   zerosrG   rD   quiver3dpoints3dr   r3   axes)r   r\   r]   izrT   r   z3rJ   s           r   plot3dz
Pca.plot3dc   s2    	*)))))))) 	EEGGG8A;;!!$"5"5"7"77	

2bAbE!B%"1
===	

46!!!B$<qqqtTVAAAbD\s
KKK: 	FF$*R.1B9PX\XbceXfgoXoFpppppFFHHHHHr   c                    t          j        |          r'|t          j        | j        j        d                   z  }|t          j        | j        d          z  }| j        j        d         }t          j        t          j        | j                  |k     d          }| j        |         | _        | 	                                 |t          |          z
  S )a   
        clips out all data points that are more than a certain number
        of standard deviations from the mean.

        sigs can be either a single value or a length-p sequence that
        specifies the number of standard deviations along each of the
        p dimensions.
        r   r   r   )r   isscalaronesr   r%   r   allabsr   r,   rI   )r   sigsr&   r
   s       r   sigclipzPca.sigclipw   s     ;t 	/bgdfl1o...DBF46q))))FLOF26$&>>D(a000vayQxr   c                 j    | j                                         | _        |                                  d S N)r+   r*   r   r,   r8   s    r   resetz	Pca.reset   s(    !!##r   c                    t          d |||fD                       }|dk    rt          d          }n|dk    rt          d          ||                                 |k    }nO|t          d|          }n<|+t	          j        |                                           |k     }nt          d          || j        j        }nKt	          j	        |d          }| j        j        j
        d         |j
        d         k    rt          d	          t	          j        |                                           j        |z  }||         j        S )
aD  
        projects the normalized values onto the components

        enthresh, nPCs, and cumen determine how many PCs to use

        if vals is None, the normalized data vectors are the values to project.
        Otherwise, it should be convertable to a p x N array

        returns n,p(>threshold) dimension array
        c                     g | ]}|d uS r   r   )r    es     r   r"   zPca.project.<locals>.<listcomp>   s    GGG!GGGr   r   Nr   z&cannot specify more than one thresholdzShould be unreachableF)r*   zshape for vals does not match)rI   slicer1   energiesr   cumsumRuntimeErrorr   r$   r#   r%   matrixrG   )r   r_   enthreshnPCscumennonnonesr
   projs           r   projectzPca.project   s7    GG$/FGGGHHq==dAA\\EFFF#MMOOh.!$t$$"Idmmoo..%7"#:;;;<68DD8De,,,Dvx~a DJqM11 !@AAAy--//00247Awyr   c                 f   t          j        |          }|j        \  }}| j        j        d         }||k    rt	          d          t           j                            t          j        |                                           j	                  }t          j
        ||f          }||ddd|f<   ||j	        z  }|rt          j        |j	                  j	        S t          j        | j        d          }	t          j        | j        d          }
t          j        |j	                  |
z  |	z   j	        S )zP
        input is an n X q array, where q <= p

        output is p X n
        r   zq > pNr   r   )r   
atleast_2dr%   r   r1   r<   invr   rG   r$   rs   r#   r   r   r   )r   r   normedr&   qr'   evinvzsr   mnssdss              r   	deprojectzPca.deproject   s    -

g!FLOq55W%%%immBId&:&:&<&<==?@@Xqe__111RaR4RTz 	08DF##%%A&&&Ctv1%%%CHTV$$S(,//r   c                    || j         }n7|j        }|j        d         | j         j        d         k    rt          d          |                                 }t          j        |          }|dd|f         |dd|f<   |                     |d          }|j        |z
  }|j        t          j        | j	        d          z  }|t          j
        | j         d          z   S )z
        pc can be a scalar or any sequence of pc indecies

        if vals is None, the source data is self.A, else whatever is in vals
        (which must be p x m)
        Nr   z1vals do not have the correct number of componentsFr   r   )r   r$   r%   r1   r   r   
zeros_liker   r   r   r   )r   pcr_   pcszpcsupcr   Bs           r   
subtractPCzPca.subtractPC   s     <6DD6Dz!}tv|A.. !TUUULLNN]3qqqt9QQQrT
NN4&&F3JCtv1%%%%Q'''''r   r   )r   r   T)r   r   rj   T)NNNN)T)__name__
__module____qualname____doc__r.   r,   r5   r9   rA   rD   rG   rK   ri   ry   r   r   r   r   r   r   r   r   r   r      s        
 *G  O O O O*     	 	 	( ( (( ( (  7 7 7 74   (  $  
! ! ! !F0 0 0 04( ( ( ( ( (r   r   )numpyr   r   r   r   r   <module>r      sM       [( [( [( [( [( [( [( [( [( [(r   