
    M/Ph@                        d Z ddlmZmZ ddlmZ ddlZddlm	Z
 ddlmZ dFdZd ZdGd
Zd Zd Zd ZdHdZdHdZdIdZdHdZdHdZdHdZdJdZdKdZedk    r ed            ed           g dZg dZeD ]Z e ee                     eD ]Z e ee                      e ee                      e ee                     g dZ  e
j!        d             e
j"        d!            e
j#        d"            ej$        ddd#          Z% e
j&        e% ee%                      e
j!        d             e
j"        d$            e
j#        d"            ej$        ddd%          Z% e
j&        e% ee ee%de%z
                                  ej'        g d&g d'g d(g          Z(e()                    d          Z*e()                    d          Z+ ee*          Z, ee+          Z- ee(          Z. ee*e+e(          Z/ ee+e*e(          Z0 edej1                   ej2        e*e+          z  Z3 edej1                   ej2        e+e*          z  Z4 ee*e+e(          Z5 ed)            ee,e-e.e/e0e3e4e5            ed*            ej'        g d+          Z ee          Z6e  ed,            ed-            ed.            ed/            ed0            ed1e*z              ed2           de*d         z
  Z7 ed3e7z              ee7de7z
  g          Z8 ed4e8z              ed5e8e7 ej9        d          z  z   e/fz              ed6            ed7e7d8d9e,dz
   ej9        d:          z  d8            ed;            ed<            ej'        g d=g d>g d?g          Z: ee:            ed@            edA ej;        d ee:d                    ee:d                   g          dz
   ej9        d:          z  z              edB            ej'        g dCg dDg dEg          Z<dS dS )La:  
Information Theoretic and Entropy Measures

References
----------
Golan, As. 2008. "Information and Entropy Econometrics -- A Review and
    Synthesis." Foundations And Trends in Econometrics 2(1-2), 1-145.

Golan, A., Judge, G., and Miller, D.  1996.  Maximum Entropy Econometrics.
    Wiley & Sons, Chichester.
    )lziplmap)statsN)pyplot)	logsumexpc                 X   |t          |           S t          j        |           } t          | j                  }d||<   |                     |          }t          j        t          j        | |                    |          z
            	                    |                    }||z   }|S )a-  
    Compute the log of the sum of exponentials log(e^{a_1}+...e^{a_n}) of a

    Avoids numerical overflow.

    Parameters
    ----------
    a : array_like
        The vector to exponentiate and sum
    axis : int, optional
        The axis along which to apply the operation.  Defaults is None.

    Returns
    -------
    sum(log(exp(a)))

    Notes
    -----
    This function was taken from the mailing list
    http://mail.scipy.org/pipermail/scipy-user/2009-October/022931.html

    This should be superceded by the ufunc when it is finished.
    N   )axis)
sp_logsumexpnpasarraylistshapemaxlogexpreshapesum)ar
   shpa_maxslses         \/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/sandbox/infotheo.pyr   r   -   s    0 |A

1A
qw--CCIEEtEE
rva%--,,,--11t1<<==A19CJ    c                     t          j        |           } t          j        t          j        |           d          r0t          j        | dk              rt          j        | dk              sdS dS )zC
    Checks to see if `X` is a proper probability distribution
    r	   r   FT)r   r   allcloser   allXs    r   _isproperdistr!   Q   s]     	
1A;rvayy!$$ BF1a4LL q!t utr   efc                 p   t          |           }|&t          j        t          j        |                    }|dk    r,t          j        |t          j        |           z  |z            }|dk    rt          j        |           t          j        |           z
  }t          j        ||z            }t          j	        |           \  }}t          j
        |          }d}|d         }	|||d         <   t          d|          D ]5}
||
         |	|z   k     r||||
         <   ||
         }	|dz  }||||
         <   6|S )z
    Discretize `X`

    Parameters
    ----------
    bins : int, optional
        Number of bins.  Default is floor(sqrt(N))
    method : str
        "ef" is equal-frequency binning
        "ew" is equal-width binning

    Examples
    --------
    Nr"   ewr	   r   )lenr   floorsqrtceilr   rankdatar   minfastsortzerosrange)r    methodnbinsnobsdiscretewidthsvecivecbinnumbaseis              r   
discretizer8   [   s,    q66D}''~~755>!#4#44T9::~~q		BF1II%u%%^A&&
d8D>>Aw"aq 	+ 	+AAw%%$*a!!Aw!$*a!!Or   c                 T    t          j        |          t          j        |           z  S )z
    There is a one-to-one transformation of the entropy value from
    a log base b to a log base a :

    H_{b}(X)=log_{b}(a)[H_{a}(X)]

    Returns
    -------
    log_{b}(a)
    )r   r   )r   bs     r   logbasechanger;      s     6!99RVAYYr   c                 <    t          t          j        d          | z  S )z$
    Converts from nats to bits
       r;   r   er   s    r   
natstobitsr@      s     q!!A%%r   c                 <    t          dt          j                  | z  S )z$
    Converts from bits to nats
    r=   r>   r   s    r   
bitstonatsrB      s     BD!!A%%r   r=   c                 V   t          j        |           } t          j        | dk              rt          j        | dk              st          d          t          j        t          j        | t          j        |           z                       }|dk    rt          d|          |z  S |S )aC  
    This is Shannon's entropy

    Parameters
    ----------
    logbase, int or np.e
        The base of the log
    px : 1d or 2d array_like
        Can be a discrete probability distribution, a 2d joint distribution,
        or a sequence of probabilities.

    Returns
    -----
    For log base 2 (bits) given a discrete distribution
        H(p) = sum(px * log2(1/px) = -sum(pk*log2(px)) = E[log2(1/p(X))]

    For log base 2 (bits) given a joint distribution
        H(px,py) = -sum_{k,j}*w_{kj}log2(w_{kj})

    Notes
    -----
    shannonentropy(0) is defined as 0
    r	   r   &px does not define proper distributionr=   )r   r   r   
ValueErrorr   
nan_to_numlog2r;   )pxlogbaseentropys      r   shannonentropyrK      s    2 
BB6"'?? C"&q// CABBBvbmBrwr{{N33444G!||Qw'''11r   c                 *   t          j        |           } t          j        | dk              rt          j        | dk              st          d          |dk    r&t	          d|           t          j        |           z  S t          j        |            S )z
    Shannon's information

    Parameters
    ----------
    px : float or array_like
        `px` is a discrete probability distribution

    Returns
    -------
    For logbase = 2
    np.log2(px)
    r	   r   rD   r=   )r   r   r   rE   r;   rG   )rH   rI   s     r   shannoninforM      s     
BB6"'?? C"&q// CABBB!||q)))BGBKK77}r   c           	      |   t          |           rt          |          st          d          |t          |          st          d          |t          j        ||           }t          j        |t          j        t          j        ||z                      z            }|dk    r|S t          d|          |z  S )a  
    Return the conditional entropy of X given Y.

    Parameters
    ----------
    px : array_like
    py : array_like
    pxpy : array_like, optional
        If pxpy is None, the distributions are assumed to be independent
        and conendtropy(px,py) = shannonentropy(px)
    logbase : int or np.e

    Returns
    -------
    sum_{kj}log(q_{j}/w_{kj}

    where q_{j} = Y[j]
    and w_kj = X[k,j]
    1px or py is not a proper probability distributionN&pxpy is not a proper joint distribtionr=   )r!   rE   r   outerr   rF   rG   r;   )rH   pypxpyrI   condents        r   condentropyrU      s    (  NM"$5$5 NLMMMd 3 3ABBB|x2fTBM"'"T'*:*:;;;<<G!||Q((722r   c                    t          |           rt          |          st          d          |t          |          st          d          |t          j        ||           }t	          | |          t          | |||          z
  S )aC  
    Returns the mutual information between X and Y.

    Parameters
    ----------
    px : array_like
        Discrete probability distribution of random variable X
    py : array_like
        Discrete probability distribution of random variable Y
    pxpy : 2d array_like
        The joint probability distribution of random variables X and Y.
        Note that if X and Y are independent then the mutual information
        is zero.
    logbase : int or np.e, optional
        Default is 2 (bits)

    Returns
    -------
    shannonentropy(px) - condentropy(px,py,pxpy)
    rO   NrP   rI   )r!   rE   r   rQ   rK   rU   rH   rR   rS   rI   s       r   
mutualinforY      s    *  NM"$5$5 NLMMMd 3 3ABBB|x2"g...R42 2 2  r   c                    t          |           rt          |          st          d          |t          |          st          d          |t          j        ||           }t	          | |||          t          ||          z  S )aa  
    An information theoretic correlation measure.

    Reflects linear and nonlinear correlation between two random variables
    X and Y, characterized by the discrete probability distributions px and py
    respectively.

    Parameters
    ----------
    px : array_like
        Discrete probability distribution of random variable X
    py : array_like
        Discrete probability distribution of random variable Y
    pxpy : 2d array_like, optional
        Joint probability distribution of X and Y.  If pxpy is None, X and Y
        are assumed to be independent.
    logbase : int or np.e, optional
        Default is 2 (bits)

    Returns
    -------
    mutualinfo(px,py,pxpy,logbase=logbase)/shannonentropy(py,logbase=logbase)

    Notes
    -----
    This is also equivalent to

    corrent(px,py,pxpy) = 1 - condent(px,py,pxpy)/shannonentropy(py)
    rO   NrP   rW   )r!   rE   r   rQ   rY   rK   rX   s       r   correntr[     s    <  NM"$5$5 NLMMMd 3 3ABBB|x2bD111.3 3 3  r   c                    t          |           rt          |          st          d          |t          |          st          d          |t          j        ||           }t	          | |||          t	          || ||          z   S )ak  
    An information theoretic covariance measure.

    Reflects linear and nonlinear correlation between two random variables
    X and Y, characterized by the discrete probability distributions px and py
    respectively.

    Parameters
    ----------
    px : array_like
        Discrete probability distribution of random variable X
    py : array_like
        Discrete probability distribution of random variable Y
    pxpy : 2d array_like, optional
        Joint probability distribution of X and Y.  If pxpy is None, X and Y
        are assumed to be independent.
    logbase : int or np.e, optional
        Default is 2 (bits)

    Returns
    -------
    condent(px,py,pxpy,logbase=logbase) + condent(py,px,pxpy,
            logbase=logbase)

    Notes
    -----
    This is also equivalent to

    covent(px,py,pxpy) = condent(px,py,pxpy) + condent(py,px,pxpy)
    rO   NrP   rW   )r!   rE   r   rQ   rT   rX   s       r   coventr]   =  s    >  NM"$5$5 NLMMMd 3 3ABBB|x2 BD'222b"dG4445 6r   r	   Rc                     t          |           st          d          t          |          }|dk    r*t          |           }|dk    rt	          d|          |z  S |S dt          |                                          v s|t          j        k    r't          j	        t          j
        |                      S | |z  } t          j	        |                                           }|dk    rdd|z
  z  |z  S dd|z
  z  t	          d|          z  |z  S )as  
    Renyi's generalized entropy

    Parameters
    ----------
    px : array_like
        Discrete probability distribution of random variable X.  Note that
        px is assumed to be a proper probability distribution.
    logbase : int or np.e, optional
        Default is 2 (bits)
    alpha : float or inf
        The order of the entropy.  The default is 1, which in the limit
        is just Shannon's entropy.  2 is Renyi (Collision) entropy.  If
        the string "inf" or numpy.inf is specified the min-entropy is returned.
    measure : str, optional
        The type of entropy measure desired.  'R' returns Renyi entropy
        measure.  'T' returns the Tsallis entropy measure.

    Returns
    -------
    1/(1-alpha)*log(sum(px**alpha))

    In the limit as alpha -> 1, Shannon's entropy is returned.

    In the limit as alpha -> inf, min-entropy is returned.
    z+px is not a proper probability distributionr	   r=   inf)r!   rE   floatrK   r;   strlowerr   r`   r   r   r   )rH   alpharI   measuregenents        r   renyientropyrg   k  s
   :  HFGGG%LLEzz##a<< G,,v55	#e**""$$	$	$rvbzz"""" 
UBVBFFHHF!||!E'{V##!E'{]1g666??r   Tc                     dS )a  
    Generalized cross-entropy measures.

    Parameters
    ----------
    px : array_like
        Discrete probability distribution of random variable X
    py : array_like
        Discrete probability distribution of random variable Y
    pxpy : 2d array_like, optional
        Joint probability distribution of X and Y.  If pxpy is None, X and Y
        are assumed to be independent.
    logbase : int or np.e, optional
        Default is 2 (bits)
    measure : str, optional
        The measure is the type of generalized cross-entropy desired. 'T' is
        the cross-entropy version of the Tsallis measure.  'CR' is Cressie-Read
        measure.
    N )rH   rR   rS   rd   rI   re   s         r   gencrossentropyrk     s      r   __main__zQFrom Golan (2008) "Information and Entropy Econometrics -- A Review and Synthesisz	Table 3.1)皙?rm   rm   rm   rm   )S㥛?g;On?g'1Z?gK?gMbp?)gh㈵>g-C6?gMbP?g{Gz?g?g333333?rm   g      ?g333333?gffffff?g?g?      ?o   InformationProbabilityi Entropye   )r   r   UUUUUU?)qq?rv   rv   )gqq?rv   UUUUUU?z	Table 3.3zdiscretize functions)2g3333335@g     @F@g      ?@g     3@gLD@gYC@g333333&@g/@gfffff?@g9@g3333334@gffffff,@g      8@g      5@g&@g      2@L0@g3333336@g333333@g;@rx   ǧA@-@g1@g333333<@gffffff0@g     0@g      G@g      #@g2@g@@g:@g0@g333333@gffffff5@g      4@gL=@ry   g @g     6@g)@gfffff:@g     9@gfffff6@gffffff&@g333334@g333333:@g"@g%@g333333/@z0Example in section 3.6 of Golan, using table 3.3z'Bounding errors using Fano's inequalityz"H(P_{e}) + P_{e}log(K-1) >= H(X|Y)zor, a weaker inequalityzP_{e} >= [H(X|Y) - 1]/log(K)z	P(x) = %sz?X = 3 has the highest probability, so this is the estimate Xhatz1The probability of error Pe is 1 - p(X=3) = %0.4gzH(Pe) = %0.4g and K=3z-H(Pe) + Pe*log(K-1) = %0.4g >= H(X|Y) = %0.4gzor using the weaker inequalityzPe = z0.4gz >= [H(X) - 1]/log(K) =    z>Consider now, table 3.5, where there is additional informationz.The conditional probabilities of P(X|Y=y) are )        r{   g      ?)ru   ru   ru   )rw   ru   ro   z2The probability of error given this information iszPe = [H(X|Y) -1]/log(K) = %0.4gz+such that more information lowers the error)gV-?gV-?gw/?)g(\?g+?g%C?)gzG?rn   gPn?)N)r"   N)r=   )Nr=   )r	   r=   r^   )r	   r=   rh   )=__doc__statsmodels.compat.pythonr   r   scipyr   numpyr   
matplotlibr   pltscipy.specialr   r   r!   r8   r;   r@   rB   rK   rM   rU   rY   r[   r]   rg   rk   __name__printr    Yr7   psubplotylabelxlabellinspacexplotarraywr   rH   rR   H_XH_YH_XY	H_XgivenY	H_YgivenXr?   rJ   D_YXD_XYI_XYdiscXpeH_perG   w2meanmarkovchainrj   r   r   <module>r      s  
 
D 1 0 0 0 0 0 0 0           $ $ $ $ $ $ 3 3 3 3 3 3! ! ! !H  # # # #P  & & && & &       F   ,3 3 3 3@   <& & & &P(6 (6 (6 (6\.@ .@ .@ .@f   , z	E    	E+ 	A"""A  kk!nn  kk!nn	E..

	E..

;;;ACKCJ}CJ}AaACHQA CKCJyCJ}AaACHQ^TT!AaC[[11222
 	***---.?.?.?@AAA	
qB	
qB
.

C
.

C>!DBr!$$IBr!$$I=24  r2!6!66D=24  r2!6!66D:bAD	E+	E#c4ItT4@@@	E
 !!!bh : : : ; ;A JqMME 
E	E
<===	E
3444	E
.///	E
#$$$	E
()))	E+
	E
KLLL	
RUB	E
=
BCCC>2ad)$$D	E
!D
()))	E
9"WRWQZZ-+, - - -	E
*+++	E
N"
N
N
NC!GWRWQZZ3G
N
N
NOOO	E
JKKK	E
:;;;	:::.../?/?/?@	A	AB	E"III	E
>???	E
+>>"Q%;P;PQ_Q_`bcd`eQfQf8g0h0hij0jlslnlstulvlv/v
wxxx	E
7888 "(,,,-=-=-=>N>N>NOPPKKKi r   