
    M/Ph                        d Z ddlZddlmZ ddlmZ ddlm	Z	  G d d          Z
edk    rdgZdev rd	Zej        ej                            ed
f           ej        edf          f         Zej         ej         ej        d
          d
d           ej        d          ddd         f         j        Z ej        g dg dg dg          Z ej        g dg dg dg          Z ej        ee          Zedej                            ej                  z  z  Z ej        eg d          Zedej                            ej                  z  z   Z e
ee          Z ee                                             ed           e!                    ddd            ee                                            dS dS dS )zQ
Created on Sun Nov 14 08:21:41 2010

Author: josef-pktd
License: BSD (3-clause)
    N)pca)LeaveOneOutc                   4    e Zd ZdZd Zd
dZd ZddZd	 ZdS )FactorModelUnivariatea  

    Todo:
    check treatment of const, make it optional ?
        add hasconst (0 or 1), needed when selecting nfact+hasconst
    options are arguments in calc_factors, should be more public instead
    cross-validation is slow for large number of observations
    c                 j    t          j        |          | _        t          j        |          | _        d S )N)npasarrayendogexog)selfr
   r   s      i/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/sandbox/datarich/factormodels.py__init__zFactorModelUnivariate.__init__   s(    Z&&
Jt$$			    Nr   Tc                     || j         }nt          j        |          }t          ||d          \  }}}}|| _        |r#t          j        |d          | _        d| _        n|| _        d| _        || _	        || _
        dS )zget factor decomposition of exogenous variables

        This uses principal component analysis to obtain the factors. The number
        of factors kept is the maximum that will be considered in the regression.
        N   )keepdim	normalizeT)prependr   )r   r   r	   r   exog_reducedsmadd_constantfactorshasconstevalsevecs)r   xr   addconstxredfactr   r   s           r   calc_factorsz"FactorModelUnivariate.calc_factors    s     9	AA
1A$'7a$H$H$H dE5  	?4>>>DLDMMDLDM



r   c                     t          | d          s|                                  t          j        | j        | j        d d d |dz   f                                                   S )Nfactors_wconstr   )hasattrr    r   OLSr
   r   fit)r   nfacts     r   fit_fixed_nfactz%FactorModelUnivariate.fit_fixed_nfact7   s^    t-.. 	 vdj$,qqq%'z":;;??AAAr   c                 L   t          | d          s|                                  | j        }|| j        j        d         |z
  }||z   dk     rt          d          t          |d          }| j        }g }t          d||z             D ]}| j        ddd|f         }t          j
        ||                                          }	|s|t          t          |                    }d}
|D ]u\  }}t          j
        ||         ||ddf                                                   }|
||         |j                            |j        ||ddf                   z
  dz  z  }
vnt"          j        }
|                    ||	j        |	j        |	j        |
g           t#          j        |          x| _        }t"          j        t#          j        |ddddf         d	          t#          j        |dddf         d	          t#          j        |ddd
f         d	          f         | _        dS )aW  estimate the model and selection criteria for up to maxfact factors

        The selection criteria that are calculated are AIC, BIC, and R2_adj. and
        additionally cross-validation prediction error sum of squares if `skip_crossval`
        is false. Cross-validation is not used by default because it can be
        time consuming to calculate.

        By default the cross-validation method is Leave-one-out on the full dataset.
        A different cross-validation sample can be specified as an argument to
        cv_iter.

        Results are attached in `results_find_nfact`



        r   Nr   zFnothing to do, number of factors (incl. constant) should be at least 1
                  @   r   )r#   r    r   r   shape
ValueErrorminr
   ranger   r$   r%   r   lenmodelpredictparamsr   nanappendaicbicrsquared_adjarrayresults_find_nfactr_argminargmax
best_nfact)r   maxfactskip_crossvalcv_iterr   y0resultskr   resprederr2inidxoutidxres_l1os                 r   fit_find_nfactz$FactorModelUnivariate.fit_find_nfact<   sQ   $ tY'' 	 =?l(+h6GH!! - . . . gr""Zq'(*++ 	N 	NA <"1"%D&T""&&((C ! 
"?)#b''22G%, \ \ME6 fRYU111W>>BBDDGF!(!6!6w~tFSTSTSTH~!V!V"WY[!\ \HH\ 6NNAsw1A8LMMMM,.HW,=,=='%7111QqS5>!!<!<biPQPQPQRSPSUV>W>WYwqqqt}Q//!1 2r   c                    t          | d          s|                                  | j        }d}|dz  }|ddt          | j                  z  z   z  }ddlm} d                    d	          }d
gdgdz  z   }t          |          } |||d|          }|dz  }|dz  }|d|	                                z   z  }|dz  }|dz  }|dz  }|dz  }|S )zprovides a summary for the selection of the number of factors

        Returns
        -------
        sumstr : str
            summary of the results for selecting the number of factors

        r<    z,
Best result for k, by AIC, BIC, R2_adj, L1Oz
                   z%5d %4d %6d %5dr   )SimpleTablezk, AIC, BIC, R2_adj, L1Oz, z%6dz%10.3f   )	data_fmtsN)txt_fmtz"
PCA regression on simulated data,z+
DGP: 2 factors and 4 explanatory variables
z)
Notes: k is number of components of PCA,z&
       constant is added additionallyz-
       k=0 means regression on constant onlyz?
       L1O: sum of squared prediction errors for leave-one-out)
r#   rL   r<   tupler@   statsmodels.iolib.tablerO   splitdict__str__)r   rE   sumstrrO   headers	numformattxt_fmt1tabls           r   summary_find_nfactz(FactorModelUnivariate.summary_find_nfact~   s!    t122 	"!!! )FF 	-"3eDO6L6L"LLL777777,22488Gxjl*	I...{7GT8DDD<<EE$''CC@@GGYYr   )Nr   T)NTN)	__name__
__module____qualname____doc__r   r    r'   rL   r^    r   r   r   r      s{         % % %   .B B B
@2 @2 @2 @2D# # # # #r   r   __main__r   i     )sizerP   r-   )      ?rg   r*   r*   )r*   r*   rg   rg   )      @r+   rg   r*   )皙?rh   rg   r*   )r*   r*   g      ?ri   ri   )rg   rg   rg   zwith cross validation - slowerF)rA   rB   rC   )"rb   numpyr   statsmodels.apiapir   statsmodels.sandbox.toolsr   #statsmodels.sandbox.tools.cross_valr   r   r_   examplesnobsc_randomnormalonesf0repeateyearangeTf2xcoefr;   dotx0r.   ytruerD   modprintr^   rL   rc   r   r   <module>r      s              ) ) ) ) ) ) ; ; ; ; ; ;P P P P P P P Pf zsHH}}U29##$q#22GBGT!H4E4EEF%		&"&))Aa001ddd1CCDF"(0000000002 3 3 "(3333332224 5 5 RVB  
c")"""1111r,,,''S))u{);;;;##B++c$$&&'''.///4udKKKc$$&&'''''-  }r   