
    M/Ph_                       d Z ddlmZ ddlmZ ddlmZ ddlZddl	m
Z
mZmZmZ ddlmc mZ ddlmZmZ ddlmZ d	 Zd
 Zd Z G d d          Z G d de          Z G d de          Zd Zd ZddZd Z ddZ!dS )zr
Base tools for handling various kinds of data structures, attaching metadata to
results, and doing data cleaning
    )annotations)lmap)reduceN)	DataFrameSeriesisnull
MultiIndex)cache_readonlycache_writable)MissingDataErrorc                    t          j        |           j        dk    r1t          j        |                                           j        dk    rd S d S d S N   )npasarrayndimsqueezexs    U/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/base/data.py_asarray_2dcolumnsr      sL    	z!}}A"*Q--"7"7"9"9">!"C"C "C"C    c                    t          j        |           } | j        dk    r| dddf         } t          j        t	          |           d          dddf         S )zy
    Makes sure input is an array and is 2d. Makes sure output is 2d. True
    indicates a null in the rows of 2d x.
    r   Naxis)r   r   r   anyr   r   s    r   _asarray_2d_null_rowsr      sU     	
1Av{{aaagJ6&))!$$$QQQW--r   c                     t          |           dk    r| dggfz  } d }t          ||                                           S )z
    Returns a boolean array which is True where any of the rows in any
    of the _2d_ arrays in arrs are NaNs. Inputs can be any mixture of Series,
    DataFrames or array_like.
    r   Fc                    t          | d          o| j        t          k    o| }t          j        t          |           |t          |          z            S )Ndtype)hasattrr    boolr   
logical_orr   )r   yx_is_boolean_arrays      r   _nan_row_maybe_two_inputsz,_nan_rows.<locals>._nan_row_maybe_two_inputs.   sU    $Q00JQW_J}215503H3K3KKN N 	Nr   )lenr   r   )arrsr&   s     r   	_nan_rowsr)   %   sT     4yyA~~5'N N N
 +T22::<<<r   c                     e Zd ZdZdZdZd&dZd Zd Zd Z	e
d             Ze
d	             Ze
d
             Zd Z e            d             Z e            d'd            Zed             Zej        d             Zed             Zej        d             Zed             Zd Zd Zd Zd Zd Zd(dZd Zd Zd Z d Z!d  Z"d! Z#d" Z$d# Z%d$ Z&d% Z'dS ))	ModelDatazi
    Class responsible for handling input data and extracting metadata into the
    appropriate form
    Nnonec                
   t          j        |          st          j        |          rddlm} t	          |          d|v r|                    d          | _        d|v r|                    d          | _        |dk    rz | j        |||fi |\  }}|| _	        | j
                            |           | j        | _        | j        | _        |                     | j        | j                  \  | _        | _        nK| j
                            |           || _        || _        |                     ||          \  | _        | _        d | _        d| _        |                     |           |                                  i | _        d S )Nr   )recarray_exceptiondesign_infoformular,   )	data_util_is_recarraystatsmodels.tools.sm_exceptionsr.   NotImplementedErrorpopr/   r0   handle_missingmissing_row_idx__dict__updateendog
orig_endogexog	orig_exog_convert_endog_exog	const_idx
k_constant_handle_constant_check_integrity_cache)	selfr:   r<   missinghasconstkwargsr.   arraysnan_idxs	            r   __init__zModelData.__init__>   s   !%(( 	:I,B4,H,H 	:JJJJJJ%&8999F""%zz-88D!::i00DLf1d1%w < <4:< <OFG#*D M  ((("jDO!YDN$($<$<TZ=AY%H %H!DJ		 M  (((#DO!DN$($<$<UD$I$I!DJ	h'''r   c                J    ddl m }  || j                  }d|v r|d= d|d<   |S )Nr   )copyr/   Trestore_design_info)rL   r8   )rD   rL   ds      r   __getstate__zModelData.__getstate__\   sH    DA- '+A#$r   c                   d|v rddl m}m} g }	 |d         }n1# t          $ r$ |d                             |d                   }Y nw xY wdD ]J}	  ||d         ||d	
          \  }} n6# t
          |f$ r}	|                    |	           Y d }	~	Cd }	~	ww xY w|d         |j        | _        |d= | j        	                    |           d S )NrM   r   )	dmatrices
PatsyErrorframer;   r=   )      r   r      r0   	dataframe)eval_envreturn_type)
patsyrQ   rR   KeyErrorjoin	NameErrorappendr/   r8   r9   )
rD   rN   rQ   rR   excdatadepth_designes
             r   __setstate__zModelData.__setstate__d   s2    A%%33333333C<z < < <++AkN;;< ) 	 	 )	!I,u6A!C !C !CIAvE!:.   JJqMMMDDDD "g%1D'(Qs'    +AAA))B5BBc                @   |du s| j         d| _        d | _        d S d}t          j        | j         d          }t          j        |                                          st          d          t          j        | j         d          }t          j	        ||k              d         
                                }|j        | _        | j        dk    r?| j         d d |f                                         dk    rt          |          | _        nd}n| j        dk    rg }|D ]]}| j         d d |f                                         }|dk    rd| _        t          |          | _         nx|                    |           ^t          j        |          dk    }	|	                                r4d| _        t          ||	                                                   | _        nd}n| j        dk    rd}n	 |r|st          j        t          j        | j         j        d                   | j         f          }
t          j                            |
          }t          j                            | j                   }t          ||k              | _        d | _        d S |r	d| _        d S d S )NFr   r   zexog contains inf or nansr   T)r<   r@   r?   r   maxisfiniteallr   minwherer   sizemeanintr_   arrayr   argmaxcolumn_stackonesshapelinalgmatrix_rank)rD   rF   check_implicitexog_maxexog_minr?   valuesidxvalueposaugmented_exog	rank_augm	rank_origs                r   rA   zModelData._handle_constant}   s   u	 1DO!DNNN #Nvdia000H;x((,,.. D&'BCCCvdia000HX!566q9AACCI'nDO!##9QQQ	\*//11Q66%(^^DNN &*NN1$$ $ . .C Iaaaf-2244Ezz*+),SMM%(((( 8F++q0Cwwyy .*+),Yszz||-D)E)E *.A%%!%  $h $ "$WTY_Q%788$)D"F "FI11.AA	I11$)<<	"%i9&<"="=!% $ #$$ $r   c                    ||         S N clsr   nan_masks      r   
_drop_nanszModelData._drop_nans   s    {r   c                &    ||         d d |f         S r   r   r   s      r   _drop_nans_2dzModelData._drop_nans_2d   s    {111h;''r   c                    g }                     dd          }|d}g }||dgz  }n|	||f}ddg}n|f}dg}|dgz  }d}	g }
t                    rى                                D ]\  }}|t          j        |          dk    r||gz  }&|j        dk    r|t          j        |          fz  }||gz  }P|                                j        dk    r|t          j        |          fz  }||gz  }|j        dk    r|	t          j        |          fz  }	|
|gz  }
t          d	          ||d}|rBt          | }|j	        d         j	        d         k    rt          d
          |          }|z  |	rWt          |	          }|j	        d         j	        d         k    rt          d          |||          z  }n	|          }|z  n"t          | |	rt          dddf         f|	z    t          j
                  st          t          ||                    }|	r0|                    t          t          |
|	                               |r!|                    fd|D                        |0|                    d|i           ||                    d|i           |g fS |dk    rt          d          |dk    r1  fd} fd}t          t          |t          ||                              }|c|1| }                     ||          }|                     ||          }|                    d|i           ||                    d|i           |	r>|                    t          t          |
t          ||	                                         |r!|                    fd|D                        |t          j                   d                                         fS t          d|z            )zu
        This returns a dictionary with keys endog, exog and the keys of
        kwargs. It preserves Nones.
        missing_idxNr   r<   r:   r   r   rT   z6Arrays with more than 2 dimensions are not yet handledzBShape mismatch between endog/exog and extra arrays given to model.zEShape mismatch between endog/exog and extra 2d arrays given to model.c                >    i | ]}|                     |d           S r   get.0krG   s     r   
<dictcomp>z,ModelData.handle_missing.<locals>.<dictcomp>  9     !< !< !<%& "#FJJq$$7$7 !< !< !<r   raisez!NaNs were encountered in the datadropc                0                         |           S r   )r   r   r   r   s    r   <lambda>z*ModelData.handle_missing.<locals>.<lambda>"  s    #..H"="= r   c                0                         |           S r   )r   r   s    r   r   z*ModelData.handle_missing.<locals>.<lambda>#  s    S%6%6q(%C%C r   c                >    i | ]}|                     |d           S r   r   r   s     r   r   z,ModelData.handle_missing.<locals>.<dictcomp>6  r   r   z missing option %s not understood)r5   r'   itemsr   r   r   r   
ValueErrorr)   rt   r   dictzipr9   r   r   r   rl   tolist)r   r:   r<   rE   rG   none_array_namesr   combinedcombined_namescombined_2dcombined_2d_nameskeyvalue_arrayupdated_row_maskcombined_nanscombined_2d_nans	drop_nansdrop_nans_2dr   s   `   `             @r   r6   zModelData.handle_missing   sY     jj55"HN| VH, t}H%v.NNxH%YN( v;; 	<$*LLNN < < [&"'+*>*>!*C*C$-$#q((K!8!8 ::H"se+NN ((**/144K!8!8 ::H"se+NN !%**BJ{$;$;#==K%#.%%$ &; < < < ""H# * )8 4 &q)X^A->>>$ &H I I I $1(#; M) 	-#,[#9#9 #)!,q0AAA$ &K L L L#/$(8((CC$$'7	'B$,, !(+H J$x4'8&:[&HIvh -	KC99::H KS):K%H%H I IJJJ = !< !< !< !<*:!< !< !< = = = &% 0111#OOVTN333R<"#FGGG yH=====ICCCCCLCY0I0IJJKKH&#/(8'8$NN52BCCE'"~~d4DEE% 0111#OOVTN333 LS):)-lK)H)H&J &J !K !K L L L = !< !< !< !<*:!< !< !< = = = RXxi003::<<<<?'IJJJr   c                    |                      |          }d }|F|                     |          }|j        dk    r|d d d f         }|j        dk    rt          d          ||fS )Nr   rT   zexog is not 1d or 2d)	_get_yarr	_get_xarrr   r   )rD   r:   r<   yarrxarrs        r   r>   zModelData._convert_endog_exog=  sq     ~~e$$>>$''DyA~~AAAtG}yA~~ !7888Tzr   c                    | j         }|                     |          }|st          | j                  }t	          |          dk    r|d         S t          |          S )Nr   r   )r;   
_get_names_make_endog_namesr:   r'   list)rD   r:   ynamess      r   r   zModelData.ynamesK  sX    '' 	3&tz22Fv;;!!9<<r   returnlist[str] | Nonec                    | j         }|:|                     |          }|st          | j                  }t	          |          S d S r   )r=   r   _make_exog_namesr<   r   )rD   r<   xnamess      r   r   zModelData.xnamesW  sH    ~__T**F 5)$)44<<tr   c                    | j         p| j        S r   )_param_namesr   rD   s    r   param_nameszModelData.param_namesa  s      /DK/r   c                    || _         d S r   )r   )rD   rz   s     r   r   zModelData.param_namesf  s    "r   c                ,    | j         | j         S | j        S )z
        Labels for covariance matrices

        In multidimensional models, each dimension of a covariance matrix
        differs from the number of param_names.

        If not set, returns param_names
        )
_cov_namesr   r   s    r   	cov_nameszModelData.cov_namesj  s     ?&?"r   c                    || _         d S r   )r   )rD   r|   s     r   r   zModelData.cov_namesy  s      r   c                |    | j         }||                     |          }n| j        }|                     |          }|S r   )r=   _get_row_labelsr;   )rD   r<   
row_labelsr:   s       r   r   zModelData.row_labels~  sD    ~--d33JJOE--e44Jr   c                    d S r   r   rD   arrs     r   r   zModelData._get_row_labels  s    tr   c                4   t          |t                    r?t          |j        t                    rd |j        D             S t	          |j                  S t          |t
                    r|j        r|j        gS d S 	 |j        j        S # t          $ r Y nw xY wd S )Nc                L    g | ]!}d                      d |D                       "S )rc   c              3     K   | ]}||V  	d S r   r   )r   levels     r   	<genexpr>z2ModelData._get_names.<locals>.<listcomp>.<genexpr>  s'       ? ?5 ? ? ? ? ? ? ?r   )r]   )r   cs     r   
<listcomp>z(ModelData._get_names.<locals>.<listcomp>  sC     . . .  ? ?A ? ? ??? . . .r   )

isinstancer   columnsr	   r   r   namer    namesAttributeErrorr   s     r   r   zModelData._get_names  s    c9%% 	#+z22 ). .!$. . . . CK(((V$$ 		x z!y&!    ts   <B 
BBc                F   t          j        |          rt          j        |          }t          j        |          }t          |          dk    r?|j        dk    r|S |j        dk    r't          j        |                                g          S |                                S r   )r1   _is_structured_ndarraystruct_to_ndarrayr   r   r'   r   r   )rD   r:   s     r   r   zModelData._get_yarr  s    +E22 	7/66E
5!!u::??zQaz5==??"3444}}r   c                z    t          j        |          rt          j        |          }t          j        |          S r   )r1   r   r   r   r   )rD   r<   s     r   r   zModelData._get_xarr  s5    +D11 	5.t44Dz$r   c                    | j         9t          | j                   t          | j                  k    rt          d          d S d S )Nz+endog and exog matrices are different sizes)r<   r'   r:   r   r   s    r   rB   zModelData._check_integrity  sB    9 49~~TZ00 !NOOO ! 00r   r   c                &   |dk    r|                      |          S |dk    r|                     |          S |dk    r|                     |          S |dk    r|                     |          S |dk    r|                     |          S |dk    r|                     |          S |dk    r|                     ||          S |dk    r|                     ||          S |d	k    r|                     |          S |d
k    r| 	                    |          S |S )Nr   rowscovdates
columns_eqcov_eqgeneric_columnsgeneric_columns_2dr   multivariate_confint)
attach_columnsattach_rows
attach_covattach_datesattach_columns_eqattach_cov_eqattach_generic_columnsattach_generic_columns_2dattach_ynamesattach_mv_confint)rD   objhowr   s       r   wrap_outputzModelData.wrap_output  s8   )&&s+++F]]##C(((E\\??3'''G^^$$S)))L  ))#...H__%%c***%%%..sE:::(((11#u===H__%%c******))#...Jr   c                    |S r   r   rD   results     r   r   zModelData.attach_columns      r   c                    |S r   r   r   s     r   r   zModelData.attach_columns_eq  r   r   c                    |S r   r   r   s     r   r   zModelData.attach_cov  r   r   c                    |S r   r   r   s     r   r   zModelData.attach_cov_eq  r   r   c                    |S r   r   r   s     r   r   zModelData.attach_rows  r   r   c                    |S r   r   r   s     r   r   zModelData.attach_dates  r   r   c                    |S r   r   r   s     r   r   zModelData.attach_mv_confint  r   r   c                    |S r   r   rD   r   argsrG   s       r   r   z ModelData.attach_generic_columns  r   r   c                    |S r   r   r   s       r   r   z#ModelData.attach_generic_columns_2d  r   r   c                    |S r   r   r   s     r   r   zModelData.attach_ynames  r   r   )Nr,   N)r   r   )r   N)(__name__
__module____qualname____doc__r   r   rJ   rO   rf   rA   classmethodr   r   r6   r>   r   r   r   propertyr   setterr   r
   r   r   r   r   r   rB   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r+   r+   6   s         LJ   <       2;$ ;$ ;$z   [ ( ( [( xK xK [xKt   ^	  	  	  ^    0 0 X0 # # #     X          ^    *
 
 
     
P P P
   0                      r   r+   c                      e Zd Zd ZdS )	PatsyDatac                    |j         j        S r   )r/   column_namesr   s     r   r   zPatsyData._get_names  s    ++r   N)r   r  r  r   r   r   r   r  r    s#        , , , , ,r   r  c                       e Zd ZdZd fd	Ze fd            Ze fd            Z fdZd Z	d Z
dd	Zd
 Zd Zd Zd Zd Zd Zd Zd Z xZS )
PandasDataz^
    Data handling class which knows how to reattach pandas metadata to model
    results
    Nc                   t          j        |          }||nt          j        |          }|j        t          k    s||j        t          k    rt	          d          t                                          ||          S )NzRPandas data cast to numpy dtype of object. Check input data with np.asarray(data).)r   r   r    objectr   superr>   rD   r:   r<   	__class__s      r   r>   zPandasData._convert_endog_exog  s|    
5!!|ttD)9)9;&  D$4v9M9M G H H Hww**5$777r   c                    t          |t          t          f          r|j        |         S t	                                          ||          S r   )r   r   r   locr  r   r   r   r   r  s      r   r   zPandasData._drop_nans  s@    a&),-- 	35?"77%%a222r   c                    t          |t          t          f          r|j        |         j        d d |f         S t	                                          ||          S r   )r   r   r   r  r  r   r  s      r   r   zPandasData._drop_nans_2d  sP    a&),-- 	65?&qqq({3377((H555r   c                   | j         | j        }}|Xt          |d          rHt          |d          r8| j         j                            | j        j                  st          d          t                                                       d S )Nindexz.The indices for endog and exog are not aligned)r;   r=   r!   r  equalsr   r  rB   r  s      r   rB   zPandasData._check_integrity  s    ot~t(( -4T7-C-C O)001EFF  MNNN  """""r   c                J    	 |j         S # t          $ r | j        j         cY S w xY wr   )r  r   r;   r   s     r   r   zPandasData._get_row_labels  s=    	)9 	) 	) 	) ?((((	)s   	 ""c                F    t          | |d           }t          ||          S )Nr  )getattrr   )rD   r   r   r
  s       r   r   z!PandasData.attach_generic_columns   s&    tUD11fL1111r   c                r    |p|}t          | |d           }t          | |d           }t          |||          S Nr  r   )r  r   )rD   r   rownamescolnamess       r   r   z$PandasData.attach_generic_columns_2d%  sC    'x44004400xBBBBr   c                p    |j         dk    rt          || j                  S t          || j                  S )Nr   r  )r   r   r   r   r   s     r   r   zPandasData.attach_columns+  s=     ;!&(89999V4+;<<<<r   c                :    t          || j        | j                  S r  )r   r   r   r   s     r   r   zPandasData.attach_columns_eq4      t{DKHHHHr   c                :    t          || j        | j                  S r  )r   r   r   s     r   r   zPandasData.attach_cov7  s    t~t~NNNNr   c                :    t          || j        | j                  S r  )r   r   r   s     r   r   zPandasData.attach_cov_eq:  r$  r   c                f   |                                 }t          j        | j        d          j        d         }|dk    r|j        |fk    r|d d d f         }|j        dk     rt          |          }nt          |          }| j        |_        | j	        t          |           d          |_        |S )Nr   ndminr   rT   )r   r   rp   r   rt   r   r   r   r   r   r'   r  )rD   r   squeezedk_endogouts        r   r   zPandasData.attach_rows=  s     >>##(4;a0006q9Q;;8>gZ77aaa(H=1""CCF##C+CKOS[[LMM2	
r   c                t   |                                 }t          j        | j        d          j        d         }|dk    r*|j        |fk    rt          j        |          d d d f         }|j        dk     rt          || j                  S t          t          j        |          | j        | j                  S )Nr   r(  r   rT   r  r  )
r   r   rp   r   rt   r   r   r   predict_datesr   )rD   r   r*  r+  s       r   r   zPandasData.attach_datesM  s    >>##(4;a0006q9Q;;8>gZ77z(++D!!!G4H=1($*<====RZ//#'#5%)[2 2 2 2r   c                Z    t          |                    d          | j        ddg          S )N)rZ   rT   lowerupperr  )r   reshaper   r   s     r   r   zPandasData.attach_mv_confintZ  s4    00#~")7!35 5 5 	5r   c                    |                                 }|j        dk     rt          || j                  S t	          || j                  S )NrT   )r   )r   )r   r   r   r   r   )rD   r   r*  s      r   r   zPandasData.attach_ynames_  sH    >>##=1(5555VT[9999r   r   )r   r  r  r  r>   r  r   r   rB   r   r   r   r   r   r   r   r   r   r   r   __classcell__)r  s   @r   r  r    sa        
8 8 8 8 8 8 3 3 3 3 [3 6 6 6 6 [6# # # # #) ) )2 2 2
C C C C= = =I I IO O OI I I   2 2 25 5 5
: : : : : : :r   r  c                    | j         dk    s| j        d         dk    rdg}n$d t          | j        d                   D             }|S )Nr   r$   c                    g | ]
}d |dz   z  S )zy%dr   r   r   is     r   r   z%_make_endog_names.<locals>.<listcomp>l  s     ???A%1Q3-???r   )r   rt   range)r:   r   s     r   r   r   h  sL    zQ%+a.A--??u{1~)>)>???Mr   c                P   |                      d          }|dk                                    rP|                                }d t          d| j        d                   D             }|                    |d           n(d t          d| j        d         dz             D             }|S )Nr   c                    g | ]}d |z  S zx%dr   r7  s     r   r   z$_make_exog_names.<locals>.<listcomp>w  s    AAAAeaiAAAr   r   constc                    g | ]}d |z  S r<  r   r7  s     r   r   z$_make_exog_names.<locals>.<listcomp>z  s    CCCAeaiCCCr   )varr   argminr9  rt   insert)r<   exog_varr?   
exog_namess       r   r   r   q  s    xx{{HA D OO%%	AAq$*Q-)@)@AAA
)W----CCq$*Q-/)B)BCCC
r   r,   c                    t          | |          }|dk    r*t          | |          }|                    |           |d fS  |j        | |fd|i|S )Nr,   )r:   r<   rE   )handle_data_class_factoryr   r9   r6   )r:   r<   rE   rG   klassret_dicts         r   r6   r6     sl    %eT22E&e$///~5tGGWGGGGr   c                L   t          j        | |          rt          }nt          j        | |          rt          }nit          j        | |          rt          }nLt          j        | |          rt          }n/t          dt          |           dt          |                    |S )z
    Given inputs
    zunrecognized data structures: z / )
r1   _is_using_ndarray_typer+   _is_using_pandasr  _is_using_patsyr  _is_using_ndarrayr   type)r:   r<   rF  s      r   rE  rE    s     't44 4		#E4	0	0 	4		"5$	/	/ 4		$UD	1	1 4ju++++tDzzz3 4 4 	4Lr   c                    t          | t          t          f          rt          j        |           } t          |t          t          f          rt          j        |          }t          | |          } || f|||d|S )N)r<   rE   rF   )r   r   tupler   r   rE  )r:   r<   rE   rF   rG   rF  s         r   handle_datarP    s    %$'' "
5!!$u&&  z$%eT22E5 T7X    r   )Nr,   )r,   N)"r  
__future__r   statsmodels.compat.pythonr   	functoolsr   numpyr   pandasr   r   r   r	   statsmodels.tools.datatoolsra   r1   statsmodels.tools.decoratorsr
   r   r3   r   r   r   r)   r+   r  r  r   r   r6   rE  rP  r   r   r   <module>rY     s    # " " " " " * * * * * *           8 8 8 8 8 8 8 8 8 8 8 8 * * * * * * * * * G G G G G G G G < < < < < <  

. 
. 
.= = ="t t t t t t t tn, , , , ,	 , , ,
s: s: s: s: s: s: s: s:l    H H H H  &	 	 	 	 	 	r   