
    M/Ph;<                         d Z ddlZddlZddlZddlmZ ddl	m
Z
 d ZddZddZdd
Zd dZd Zd!dZd Zd Zd Zd"dZd Zd Zd Z G d de          Zd#dZd$dZdS )%z
Utility functions models code
    N)_is_using_pandas)
array_likec                     t          | t                    r| S t          | t                    r|                     d          S t          |           S )Nlatin1)
isinstancestrbytesdecode)ss    W/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/tools/tools.pyasstr2r      sH    !S 	Au		 xx!!!1vv    c                 D    i }t          |           D ]\  }}||||z   <   |S )zd
    Helper function to create a dictionary mapping a column number
    to the name in tmp_arr.
    )	enumerate)tmp_arroffsetcol_mapicol_names        r   _make_dictnamesr      s:    
 G )) ' '8&F
Nr      c                    t          j        |           } | j        dk    r| dddf         } |t          j        |          }|j        dk    r|dddf         }t          j        t          j        |                               |           t          j        |                              |                     }| |         ||         fS t          j        |                               |           }| |         S )a  
    Returns views on the arrays Y and X where missing observations are dropped.

    Y : array_like
    X : array_like, optional
    axis : int
        Axis along which to look for missing observations.  Default is 1, ie.,
        observations in rows.

    Returns
    -------
    Y : ndarray
        All Y where the
    X : ndarray

    Notes
    -----
    If either Y or X is 1d, it is reshaped to be 2d.
    r   N)npasarrayndimarraylogical_andisnanany)YXaxiskeepidxs       r   drop_missingr$       s    ( 	
1Av{{aaagJ}HQKK6Q;;!!!T'
A."(1++//$"7"7!7"$(1++//$"7"7!79 9z1W:%%8A;;??4(((zr   Fc                      t          d          )ax  
    Construct a dummy matrix from categorical variables

    .. deprecated:: 0.12

       Use pandas.get_dummies instead.

    Parameters
    ----------
    data : array_like
        An array, Series or DataFrame.  This can be either a 1d vector of
        the categorical variable or a 2d array with the column specifying
        the categorical variable specified by the col argument.
    col : {str, int, None}
        If data is a DataFrame col must in a column of data. If data is a
        Series, col must be either the name of the Series or None. For arrays,
        `col` can be an int that is the (zero-based) column index
        number.  `col` can only be None for a 1d array.  The default is None.
    dictnames : bool, optional
        If True, a dictionary mapping the column number to the categorical
        name is returned.  Used to have information about plain arrays.
    drop : bool
        Whether or not keep the categorical variable in the returned matrix.

    Returns
    -------
    dummy_matrix : array_like
        A matrix of dummy (indicator/binary) float variables for the
        categorical data.
    dictnames :  dict[int, str], optional
        Mapping between column numbers and categorical names.

    Notes
    -----
    This returns a dummy variable for *each* distinct variable.  If a
    a DaataFrame is provided, the names for the new variable is the
    old variable name - underscore - category name.  So if the a variable
    'vote' had answers as 'yes' or 'no' then the returned array would have to
    new variables-- 'vote_yes' and 'vote_no'.  There is currently
    no name checking.

    Examples
    --------
    >>> import numpy as np
    >>> import statsmodels.api as sm

    Univariate examples

    >>> import string
    >>> string_var = [string.ascii_lowercase[0:5],
    ...               string.ascii_lowercase[5:10],
    ...               string.ascii_lowercase[10:15],
    ...               string.ascii_lowercase[15:20],
    ...               string.ascii_lowercase[20:25]]
    >>> string_var *= 5
    >>> string_var = np.asarray(sorted(string_var))
    >>> design = sm.tools.categorical(string_var, drop=True)

    Or for a numerical categorical variable

    >>> instr = np.floor(np.arange(10,60, step=2)/10)
    >>> design = sm.tools.categorical(instr, drop=True)

    With a structured array

    >>> num = np.random.randn(25,2)
    >>> struct_ar = np.zeros((25,1),
    ...                      dtype=[('var1', 'f4'),('var2', 'f4'),
    ...                             ('instrument','f4'),('str_instr','a5')])
    >>> struct_ar['var1'] = num[:,0][:,None]
    >>> struct_ar['var2'] = num[:,1][:,None]
    >>> struct_ar['instrument'] = instr[:,None]
    >>> struct_ar['str_instr'] = string_var[:,None]
    >>> design = sm.tools.categorical(struct_ar, col='instrument', drop=True)

    Or

    >>> design2 = sm.tools.categorical(struct_ar, col='str_instr', drop=True)
    zcategorical has been removed)NotImplementedError)datacol	dictnamesdrops       r   categoricalr+   G   s    ` <
=
==r   Tskipc                    t          | d          rddlm}  || d||          S t          j        |           }|j        }|dk    r|dddf         }n|j        dk    rt          d          t          j        |d	          dk    }|t          j        |d
k    d	          z  }|	                                rz|dk    r|S |dk    rl|dk    rt          d          t          j
        |j        d                   }d                    d ||         D                       }t          d| d          t          j        |j        d                   |g}|r|n
|ddd         }t          j        |          S )aq  
    Add a column of ones to an array.

    Parameters
    ----------
    data : array_like
        A column-ordered design matrix.
    prepend : bool
        If true, the constant is in the first column.  Else the constant is
        appended (last column).
    has_constant : str {'raise', 'add', 'skip'}
        Behavior if ``data`` already has a constant. The default will return
        data without adding another constant. If 'raise', will raise an
        error if any column has a constant value. Using 'add' will add a
        column of 1s if a constant column is present.

    Returns
    -------
    array_like
        The original values with a constant (column of ones) as the first or
        last column. Returned value type depends on input type.

    Notes
    -----
    When the input is a pandas Series or DataFrame, the added column's name
    is 'const'.
    Nr   )	add_trendc)trendprependhas_constantr      z)Only implemented for 2-dimensional arraysr"           r,   raisezdata is constant.,c                 ,    g | ]}t          |          S  )r   ).0r/   s     r   
<listcomp>z add_constant.<locals>.<listcomp>   s     K K KAQ K K Kr   z
Column(s) z are constant.)r   statsmodels.tsa.tsatoolsr.   r   r   r   
ValueErrorptpallr   arangeshapejoinonescolumn_stack)	r'   r1   r2   r.   xr   is_nonzero_constcolumnscolss	            r   add_constantrJ      s   8 d## V666666yS'UUUU 	
4A6DqyyaaagJ	
!DEEEvaa(((A-qCxa0000 	D6!!HW$$qyy !4555)AGAJ//xx K K9I1J K K KLL !Bd!B!B!BCCC			a A!!DDbD'A?1r   c                    t          | dd          } t          |dd          }| j        dk    r| dddf         n| } | j        d         |j        d         k    rt          d|j        d         z            t	          j        | |g          }t          j                            |          t          j                            |          k    rd	S d
S )a6  
    True if (Q, P) contrast `c` is estimable for (N, P) design `d`.

    From an Q x P contrast matrix `C` and an N x P design matrix `D`, checks if
    the contrast `C` is estimable by looking at the rank of ``vstack([C,D])``
    and verifying it is the same as the rank of `D`.

    Parameters
    ----------
    c : array_like
        A contrast matrix with shape (Q, P). If 1 dimensional assume shape is
        (1, P).
    d : array_like
        The design matrix, (N, P).

    Returns
    -------
    bool
        True if the contrast `c` is estimable on design `d`.

    Examples
    --------
    >>> d = np.array([[1, 1, 1, 0, 0, 0],
    ...               [0, 0, 0, 1, 1, 1],
    ...               [1, 1, 1, 1, 1, 1]]).T
    >>> isestimable([1, 0, 0], d)
    False
    >>> isestimable([1, -1, 0], d)
    True
    r/   r3   )maxdimdr   r   NzContrast should have %d columnsFT)r   r   rB   r>   r   vstacklinalgmatrix_rank)r/   rM   news      r   isestimablerS      s    > 	1c!$$$A1c"""Afkk$'

qAwqzQWQZ:QWQZGHHH
)QF

C	yS!!RY%:%:1%=%===u4r   V瞯<c           	         t          j        |           } |                                 } t           j                            | d          \  }}}t          j        |          }|j        d         }|j        d         }|t           j                            |          z  }t          t          ||                    D ]"}	||	         |k    rd||	         z  ||	<   d||	<   #t          j        t          j        |          t          j        |ddt           j        f         t          j        |                              }
|
|fS )z}
    Return the pinv of an array X as well as the singular values
    used in computation.

    Code adapted from numpy.
    Fr   r         ?r5   N)r   r   	conjugaterP   svdcopyrB   maximumreducerangemindot	transposemultiplynewaxis)rF   rcondur   vts_origmncutoffr   ress              r   pinv_extendedrj      s    	
1A	Ay}}Q&&HAq"WQZZF	
A
ARZ&&q)))F3q!99  Q4&==ad7AaDDAaDD
&b!!2;qBJ/?/1|A$@ $@ A AC;r   c                 8   t          j        |           } t          j        | t           j                  }t          j        | j                  }| }||         | j        |         dk    z  ||<   d| j        |         z  |j        |<   t           j        |j        |<   |S )z
    Reciprocal of an array with entries less than or equal to 0 set to 0.

    Parameters
    ----------
    x : array_like
        The input array.

    Returns
    -------
    ndarray
        The array with 0-filled reciprocals.
    dtyper   rV   r   r   
zeros_likefloat64r   flatnan)rF   outnansposs       r   reciprrv     s     	
1A
-
,
,
,C8AFD%C3x16#;?+CH!&+%CHSMVCHTNJr   c                 8   t          j        |           } t          j        | t           j                  }t          j        | j                  }| }||         | j        |         dk    z  ||<   d| j        |         z  |j        |<   t           j        |j        |<   |S )z
    Reciprocal of an array with entries less than 0 set to 0.

    Parameters
    ----------
    x : array_like
        The input array.

    Returns
    -------
    ndarray
        The array with 0-filled reciprocals.
    rl   r   rV   rn   )rF   rs   rt   non_zeros       r   recipr0ry   /  s     	
1A
-
,
,
,C8AFDuH!(+qvh/?1/DEHXqvh//CHXVCHTNJr   c                      t           j                             dz  d          } fdt          j        |          D             }t          j        t          j        |                    S )z
    Erase columns of zeros: can save some time in pseudoinverse.

    Parameters
    ----------
    matrix : ndarray
        The array to clean.

    Returns
    -------
    ndarray
        The cleaned array.
    r3   r   c                 (    g | ]}d d |f         S Nr9   )r:   r   matrixs     r   r;   zclean0.<locals>.<listcomp>V  s%    
8
8
8A6!!!Q$<
8
8
8r   )r   addr[   flatnonzeror   r_   )r}   colsumvals   `  r   clean0r   G  s[     V]]619a((F
8
8
8
8!7!7
8
8
8C8BL%%&&&r   c                    |t           j                            |           }t           j                            | d          \  }}}t          j        |          }|ddd         }g }t          |          D ]'}|                    |dd||         f                    (t          j        t          j        |                    	                    t           j
                  S )a  
    Return an array whose column span is the same as x.

    Parameters
    ----------
    x : ndarray
        The array to adjust, 2d.
    r : int, optional
        The rank of x. If not provided, determined by `np.linalg.matrix_rank`.

    Returns
    -------
    ndarray
        The array adjusted to have full rank.

    Notes
    -----
    If the rank of x is known it can be specified as r -- no check
    is made to ensure that this really is the rank of x.
    NF)full_matricesr<   )r   rP   rQ   rX   argsortr\   appendr   r_   astyperp   )rF   rvrM   rc   ordervaluer   s           r   fullrankr   Z  s    * 	yI!!!$$immAUm33GAq!JqMME$$B$KEE1XX % %Qqqq%({^$$$$:bl5))**11"*===r   c                 T    t          |          }d||<   |                     |          S )aW  
    Unsqueeze a collapsed array.

    Parameters
    ----------
    data : ndarray
        The data to unsqueeze.
    axis : int
        The axis to unsqueeze.
    oldshape : tuple[int]
        The original shape before the squeeze or reduce operation.

    Returns
    -------
    ndarray
        The unsqueezed array.

    Examples
    --------
    >>> from numpy import mean
    >>> from numpy.random import standard_normal
    >>> x = standard_normal((3,4,5))
    >>> m = mean(x, axis=1)
    >>> m.shape
    (3, 5)
    >>> m = unsqueeze(m, 1, x.shape)
    >>> m.shape
    (3, 1, 5)
    >>>
    r   )listreshape)r'   r"   oldshapenewshapes       r   	unsqueezer   {  s*    > H~~HHTN<<!!!r   c                 L   t          j        t          j        |           |dk              }t          j        | dk    t          j        |                    }||z   }t          j        t          j        |           t          j        |                    }t           j        ||<   |S )z
    Returns np.dot(left_matrix, right_matrix) with the convention that
    nan * 0 = 0 and nan * x = nan if x != 0.

    Parameters
    ----------
    A, B : ndarray
    r   )r   r^   r   
nan_to_numrr   )ABshould_be_nan_1should_be_nan_2should_be_nanCs         r   nan_dotr     s     fRXa[[1633Ofa1frx{{33O#o5M 	r}Qq!1!122AvAmHr   c                 $    t          | d|           S )z
    Gets raw results back from wrapped results.

    Can be used in plotting functions or other post-estimation type
    routines.
    _results)getattr)resultss    r   maybe_unwrap_resultsr     s     7J000r   c                   "     e Zd ZdZ fdZ xZS )Buncha  
    Returns a dict-like object with keys accessible via attribute lookup.

    Parameters
    ----------
    *args
        Arguments passed to dict constructor, tuples (key, value).
    **kwargs
        Keyword agument passed to dict constructor, key=value.
    c                 H     t                      j        |i | | | _        d S r|   )super__init____dict__)selfargskwargs	__class__s      r   r   zBunch.__init__  s*    $)&)))r   )__name__
__module____qualname____doc__r   __classcell__)r   s   @r   r   r     sB        	 	        r   r   c                    | | S t          | d          }| j        dk    r|r	| | j        fS | dfS | j        dk    rt          d          |r| j        nd}|r t          j        |           dddf         |fS t          j        |           |fS )a  

    Parameters
    ----------
    x : ndarray, Series, DataFrame or None
        Input to verify dimensions, and to transform as necesary
    ndarray : bool
        Flag indicating whether to always return a NumPy array. Setting False
        will return an pandas DataFrame when the input is a Series or a
        DataFrame.

    Returns
    -------
    out : ndarray, DataFrame or None
        array or DataFrame with 2 dimensiona.  One dimensional arrays are
        returned as nobs by 1. None is returned if x is None.
    names : list of str or None
        list containing variables names when the input is a pandas datatype.
        Returns None if the input is an ndarray.

    Notes
    -----
    Accepts None for simplicity
    Nr3   zx mst be 1 or 2-dimensional.)	r   r   rH   r>   namer   r   pd	DataFrame)rF   ndarray	is_pandasr   s       r   
_ensure_2dr     s    2 	y D))Iv{{ 	ai<d7N	
!7888(166DD %z!}}QQQW%t++|A$$r   qrc                    t          | dd          } |dk    r}| ddt          j        | dk    d          f         } | t          j        | dz                      d                    z  } | j        | z  } t          j                            | |d	          S |d
k    rt          j        	                    | d          \  }t          j
        t          j        |                    }|5|d         | j        d         z  t          j        t                    j        z  }t!          ||k                                              S t          j                            | |          S )a(  
    Matrix rank calculation using QR or SVD

    Parameters
    ----------
    m : array_like
        A 2-d array-like object to test
    tol : float, optional
        The tolerance to use when testing the matrix rank. If not provided
        an appropriate value is selected.
    method : {"ip", "qr", "svd"}
        The method used. "ip" uses the inner-product of a normalized version
        of m and then computes the rank using NumPy's matrix_rank.
        "qr" uses a QR decomposition and is the default. "svd" defers to
        NumPy's matrix_rank.

    Returns
    -------
    int
        The rank of m.

    Notes
    -----
    When using a QR factorization, the rank is determined by the number of
    elements on the leading diagonal of the R matrix that are above tol
    in absolute value.
    rf   r3   rN   ipNr   r4   T)tol	hermitianr   r   )moder   )r   )r   r   r   sqrtsumTrP   rQ   scipyr   absdiagrB   finfofloatepsint)rf   r   methodr   abs_diags        r   rQ   rQ     s,   8 	1c"""A~~aaaQQ''''(aQ(((C!Gy$$QC4$@@@	4\__QS_))6"'!**%%;1+
*RXe__-@@CHsN''))***y$$QC$000r   )r   )Nr   )NFF)Tr,   )rT   r|   )F)Nr   )r   numpyr   pandasr   scipy.linalgr   statsmodels.tools.datar   statsmodels.tools.validationr   r   r   r$   r+   rJ   rS   rj   rv   ry   r   r   r   r   r   dictr   r   rQ   r9   r   r   <module>r      s                3 3 3 3 3 3 3 3 3 3 3 3            NP> P> P> P>h7 7 7 7t' ' 'T   0  0  0' ' '&> > > >B!" !" !"H  .1 1 1    D    (% (% (% (%V)1 )1 )1 )1 )1 )1r   