
    M/Pha                     `    d Z ddlmZ ddlZddlmZ d Zd Zd Z	d Z
dd	Zd
 ZddZddZdS )a  
Working with categorical data
=============================

use of dummy variables, group statistics, within and between statistics
examples for efficient matrix algebra

dummy versions require that the number of unique groups or categories is not too large
group statistics with scipy.ndimage can handle large number of observations and groups
scipy.ndimage stats is missing count

new: np.bincount can also be used for calculating values per label
    )lrangeN)ndimagec                     t          j        t          j        |           dz             }t          j        t	          j        || |                    }||          S )N   labelsindex)nparangemaxarrayr   mean)yxlabelsunique
labelmeanss       j/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/sandbox/regression/try_catdata.pylabelmeanfilterr      sI     9RVAYYq[))L',q,GGGHHJa=    c           
         t          j        t          j        |           dz             }g }g }|j        D ][}t          j        t          j        || |                    }|                    ||                     |                    |           \t          j        t          j        | |d         |d         dz   d| |                    }|t          j        |          t          j        |          j        fS )Nr   r   r   )	r
   r   r   Tr   r   r   append	histogram)r   r   r   labmeansdatalabmeansxxr   
labelcounts           r   labelmeanfilter_ndr   "   s    
 9RVAYYq[))LLHc $ $Xgl2a|LLLMM
JqM***
####'+A|AR@PQR@R!<9 9 9 : :J
 rx))28L+A+A+CCCr   c                     t          j        | dd          \  }}t          j        t          j        ||t          j        t          j        |          dz                                 }||         }|S )NFTreturn_indexreturn_inverser   r   )r
   uniquer   r   r   r   r   )ysr   unilunilinvr   arr3s         r   labelmeanfilter_strr)   7   se     IbuTJJJMD'',q	"&QU,,WX.@Y@YZZZ[[JgDKr   c                 $   t          |           }t          j        | d          \  }}t          j        |          }t          j        ||          d|z  z  }||         }t          j        |||z
  dz            d|z  z  }||         }	|||||	fS )z:uses np.bincount, assumes factors/labels are integers
    r   )r#   )weightsg      ?   )lenr
   r$   bincount)
factorsvaluesnixrindgcountgmeanmeanarr	withinvarwithinvararrs
             r   groupstatsbinr9   ?   s     	GAi222GBt[FKf---F
;EDkGD6'>A*=>>>#f*MIT?L57I|;;r   c                 F   || }n_t          j        |          }|j        dk    r>| j        dk    r3t          j        d | D                       ddt           j        f         }n| }t          j        |dd          \  }}|t          j        t          |                    |fS )zconvert labels based on multiple variables or string labels to unique
    index labels 0,1,2,...,nk-1 where nk is the number of distinct labels
    Nr   r,   c                 L    g | ]!}d |dd                                          z  "S )z@%s@Nr,   )tostring).0iis     r   
<listcomp>z!convertlabels.<locals>.<listcomp>U   s/    GGGbv2A2(9(99GGGr   FTr!   )r
   r   sizendimnewaxisr$   r   r-   )r%   indicesylabelidxr&   r'   s         r   convertlabelsrF   L   s     hw8a<<BGqLLXGGBGGGHH2:VFF FIf5NNNMD'BIc$ii(($..r   c                     t          j        t          j        || |                    }t          j        t          j        || |                    }||fS )z)use ndimage to get fast mean and variancer   )r
   r   r   r   var)r   r   r   r   	labelvarss        r   groupsstats_1drJ   `   sM    ',q,GGGHHJQqEEEFFIy  r   c                    |s| j         dk    r>| j        d         dk    r-t          | t          | j        d                             \  }}}nR|                                 }|                                 }t          j        ||                                 dz             }|j         dk    r|d d t          j	        f         }||k    
                    t                    }|S )Nr,   r   )rA   shaperF   r   copyminr
   r   r   rB   astypeint)r   nonseqycatuniques	unitranslymindummys          r   	cat2dummyrW   f   s     ,!&A++!'!*q..$1!VAGAJ5G5G$H$H gyyvvxxuuww)D++yA~~AAAbjL!W_$$S))ELr   c                 b   |j         dk    r|d d t          j        f         }t          | |          }|                    dt
                    }t          j        |j        |          |z  }t          j        ||j                  }||z
  }t          j        ||z  j        |          |z  }||||fS )Nr   )rQ   r   )dtype)rA   r
   rB   rW   sumfloatdotr   )	r   r   rQ   rV   countgrmeangrmeandata
xdevmeangrvargrs	            r   groupsstats_dummyrb   s   s    v{{aaa
lOa'''Eiii''GVACw&FveFH%%HXJFJ+.66@E5*g--r   )N)r   )__doc__statsmodels.compat.pythonr   numpyr
   scipyr   r   r   r)   r9   rF   rJ   rW   rb    r   r   <module>rh      s     - , , , , ,            D D D*  
< 
< 
</ / / /(! ! !   
. 
. 
. 
. 
. 
.r   