
    M/Phu[                     
   d Z ddlZddlmZmZmZ i ZdqdZdqdZ	dqdZ
drdZdrdZdsd	Zdqd
ZdtdZ	 d Zd Zd ZdqdZedk    	r ed            ed           dZddgZddgZ eej                            ee                     ej                            ee          Z e ej        ej                            e ej        ddd          dddf                   eddd         z                        e ej        d d                     ej        Z eeddgej                            edd           d!          Z e ej        ed d"g                     ej                            edd           Z  e ej        d# d d"g                     ej        !                    edd"d$%          Z" ej#        e"$                                e"%                                g          Z  ej        d& d dg          Z& ee&            ed'eej                            ee&          z              ed(            ed)           d* ej'        j(        _)         ej#        g d+          Zej*        !                    dd,d-%          Z+ ed.dd,dd/            eej*        ,                    e+                     d0 ed1z  D             Z- ej#        e+$                                e+%                                g          Z. ej/        d2 g d3d-4          Z0 ed5e0            ed6            ed7           d$Z1ej2        Z ej#        ddg          Zd8Z3e!                    d9dd/e1%          Z4d: ed1z  D             Z5 ej        e3 Z6 ej#        e4$                                e4%                                g          Z. ej        d; g d<          Z7 ed=e7            ej/        d> g d<          Z8 ed?e8            ee,                    e4                      e ej        d@ d d"g                      e ej/        dA d d"g                      ee,                    e4                      e ej        dB d d"g                      ej#        g d+          ZdC ed1z  D             Z5 ej/        dD g d<          Z9 edEe9            eee4g d<ddF          Z: edGe:           ej2                             ej        dd/dH          d9          Z; ej<        e4e;I          \  Z=Z; edJ            eej2        e=e;g dK          Z> eej2        e=e;g dK          Z? eej2        e=e;g dKdLM          Z@ ee3            ej<        e4dNI          \  ZAZB	  eej2        eAeBg dK          ZC eej2        eAeBg dK          ZD eej2        eAeBg dKdLM          ZEej2        ,                    e4          ZF ejG        dOP            edQe1            edRe3            edSeF           e  edTeC            edUeD            edVeE           e  edWe>            edXe?            edYe@           e  edZe9            ed[e7            ed\e8           	  ed]            ed^            ejH        d          ZIdZI eeI           ejJ        !                    eId1dd_`          ZK eeKL                                            eejJ        ,                    eKd eKL                                d/z
  d/a                      ejM        eK          ZNd eKL                                d/z
  d/gZOeNejJ        fZP e ej/        eeOePb                      edc            edd           ejQ        !                    dde%          ZR ejM        eR          ZSeSejT        fZUd eRL                                d9z
  d/gZV e ej/        eeVeUb                      eejT        ,                    eRddfda                      edgejQ        ,                    eR                      eejQ        eR          ZW edheW           eSejQ        fZX eYdij          ZZ eeeWeXeZ          \  Z[Z\ eej]        ^                    e[          d                    dk Z_ e e_eW                      ej<        eRdNI          \  Z`Za	  eejQ        e`eaeV          Zb eejQ        e`eaeV          Zc edleb            edmec            eejQ        eReVddF          Zd edned            e eejQ        eReV ej        ddod          dF                      ej<        eR ejQ        d                               ej        ddod                    I          \  Z`Za edp            e eejQ        e`eaeV                     dS dS )ua(  estimate distribution parameters by various methods
method of moments or matching quantiles, and Maximum Likelihood estimation
based on binned data and Maximum Product-of-Spacings

Warning: I'm still finding cut-and-paste and refactoring errors, e.g.
    hardcoded variables from outer scope in functions
    some results do not seem to make sense for Pareto case,
    looks better now after correcting some name errors

initially loosely based on a paper and blog for quantile matching
  by John D. Cook
  formula for gamma quantile (ppf) matching by him (from paper)
  http://www.codeproject.com/KB/recipes/ParameterPercentile.aspx
  http://www.johndcook.com/blog/2010/01/31/parameters-from-percentiles/
  this is what I actually used (in parts):
  http://www.bepress.com/mdandersonbiostat/paper55/

quantile based estimator
^^^^^^^^^^^^^^^^^^^^^^^^
only special cases for number or parameters so far
Is there a literature for GMM estimation of distribution parameters? check
    found one: Wu/Perloff 2007


binned estimator
^^^^^^^^^^^^^^^^
* I added this also
* use it for chisquare tests with estimation distribution parameters
* move this to distribution_extras (next to gof tests powerdiscrepancy and
  continuous) or add to distribution_patch


example: t-distribution
* works with quantiles if they contain tail quantiles
* results with momentcondquant do not look as good as mle estimate

TODOs
* rearange and make sure I do not use module globals (as I did initially) DONE
  make two version exactly identified method of moments with fsolve
  and GMM (?) version with fmin
  and maybe the special cases of JD Cook
  update: maybe exact (MM) version is not so interesting compared to GMM
* add semifrozen version of moment and quantile based estimators,
  e.g. for beta (both loc and scale fixed), or gamma (loc fixed)
* add beta example to the semifrozen MLE, fitfr, code
  -> added method of moment estimator to _fitstart for beta
* start a list of how well different estimators, especially current mle work
  for the different distributions
* need general GMM code (with optimal weights ?), looks like a good example
  for it
* get example for binned data estimation, mailing list a while ago
* any idea when these are better than mle ?
* check language: I use quantile to mean the value of the random variable, not
  quantile between 0 and 1.
* for GMM: move moment conditions to separate function, so that they can be
  used for further analysis, e.g. covariance matrix of parameter estimates
* question: Are GMM properties different for matching quantiles with cdf or
  ppf? Estimate should be the same, but derivatives of moment conditions
  differ.
* add maximum spacings estimator, Wikipedia, Per Brodtkorb -> basic version Done
* add parameter estimation based on empirical characteristic function
  (Carrasco/Florens), especially for stable distribution
* provide a model class based on estimating all distributions, and collect
  all distribution specific information


References
----------

Ximing Wu, Jeffrey M. Perloff, GMM estimation of a maximum entropy
distribution with interval data, Journal of Econometrics, Volume 138,
Issue 2, 'Information and Entropy Econometrics' - A Volume in Honor of
Arnold Zellner, June 2007, Pages 532-546, ISSN 0304-4076,
DOI: 10.1016/j.jeconom.2006.05.008.
http://www.sciencedirect.com/science/article/B6VC0-4K606TK-4/2/78bc07c6245546374490f777a6bdbbcc
http://escholarship.org/uc/item/7jf5w1ht  (working paper)

Johnson, Kotz, Balakrishnan: Volume 2


Author : josef-pktd
License : BSD
created : 2010-04-20

changes:
added Maximum Product-of-Spacings 2010-05-12

    N)statsoptimizespecialc                       fd}|S )a  estimate distribution parameters based method of moments (mean,
    variance) for distributions with 1 shape parameter and fixed loc=0.

    Returns
    -------
    cond : function

    Notes
    -----
    first test version, quantile argument not used

    c                 j    | \  }}                     |d|          }t          j                  |z
  S )N        r   nparray)paramsalphascalemom2sdistfnmom2s       l/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/sandbox/distributions/estimators.pycondzgammamomentcond.<locals>.condn   s4    uUBu--x~~e##     )r   r   r   quantiler   s   ` `  r   gammamomentcondr   a   s)    $ $ $ $ $ $
 Kr   c                 h    |\  }}|                      |d|          }t          j        |          |z
  S )a  estimate distribution parameters based method of moments (mean,
    variance) for distributions with 1 shape parameter and fixed loc=0.

    Returns
    -------
    difference : ndarray
        difference between theoretical and empirical moments

    Notes
    -----
    first test version, quantile argument not used

    The only difference to previous function is return type.

    r   r	   )r   r   r   r   r   r   r   s          r   gammamomentcond2r   u   s5      LE5LL5))E8D>>%r   c                     |\  }}}t          j        |                     |||                    |z
  }|>|\  }}	|                     |	|||          |z
  }
t          j        ||
dd         g          S |S )a+  moment conditions for estimating distribution parameters using method
    of moments, uses mean, variance and one quantile for distributions
    with 1 shape parameter.

    Returns
    -------
    difference : ndarray
        difference between theoretical and empirical moments and quantiles

    N   )r
   r   r   cdfconcatenater   r   r   r   shapelocr   mom2diffpqxqcdfdiffs              r   momentcondunboundr%      s     E3xUC6677$>HB**RU33b8~x!5666Or   c                     |\  }}t          j        |                     |||                    |z
  }|"|\  }}	|                     |	|||          |z
  }
|
S |S )a  moment conditions for estimating loc and scale of a distribution
    with method of moments using either 2 quantiles or 2 moments (not both).

    Returns
    -------
    difference : ndarray
        difference between theoretical and empirical moments or quantiles

    )r
   r   r   r   r   s              r   momentcondunboundlsr'      sf     JCxUC77884?HB**RU33b8Or   c                     t          |          dk    r|\  }}nt          |          dk    r|\  }}}n	 |\  }} | j        |g|R  |z
  }	|	S )ad  moment conditions for estimating distribution parameters by matching
    quantiles, defines as many moment conditions as quantiles.

    Returns
    -------
    difference : ndarray
        difference between theoretical and empirical quantiles

    Notes
    -----
    This can be used for method of moments or for generalized method of
    moments.

          )lenr   )
r   r   r   r   r   r    r   r"   r#   r$   s
             r   momentcondquantr,      sr      6{{a
UU	V		"sEE 	 FBfj%f%%%*GNr   c                     t          j        g d          |6t           d          r                               }ndg j        z  ddgz   }fddz  D             d t          j         fd|          }|S )	N{Gz?g?皙?g?g333333??gffffff?Gz?	_fitstartr   r         ?c                 :    g | ]}t          j        |          S r   )r   scoreatpercentile).0pxs     r   
<listcomp>z#fitquantilesgmm.<locals>.<listcomp>   s&    
=
=
=Q5"1a((
=
=
=r   d   c           	      Z    t          j        t          | fd           dz            S Nr   r)   )r
   sumr,   )r   r   r   pquantxqss    r   <lambda>z!fitquantilesgmm.<locals>.<lambda>   s4    vcl$GGGJ*L *L r   )r
   r   hasattrr3   numargsr   fmin)r   r9   startr@   frozenparestr   rA   s   `` `  @@r   fitquantilesgmmrI      s    ~@@@AA}6;'' 	1$$Q''EEC&"R0E
=
=
=
=&*
=
=
=CE] L L L L L L LMRT TFMr   c                      |t           t          j                  }t          j        |dz              fd}t          j        ||          S )aM  estimate parameters of distribution function for binned data using MLE

    Parameters
    ----------
    distfn : distribution instance
        needs to have cdf method, as in scipy.stats
    freq : ndarray, 1d
        frequency count, e.g. obtained by histogram
    binedges : ndarray, 1d
        binedges including lower and upper bound
    start : tuple or array_like ?
        starting values, needs to have correct length

    Returns
    -------
    paramest : ndarray
        estimated parameters

    Notes
    -----
    todo: add fixed parameter option

    added factorial

    Nr   c                     t          j         j        g| R            }t          j        t          j        |          z  t          j        dz             z
            z    S )[negative loglikelihood function of binned data

        corresponds to multinomial
        r   )r
   diffr   r?   logr   gammaln)r   probbinedgesr   freq
lnnobsfacts     r   nloglikezfitbinned.<locals>.nloglike  s`    
 wzvz(4V44455bfT"&,,%6Q8O8O%OPPPQQr   )NotImplementedErrorr
   r?   r   rO   r   rE   )r   rR   rQ   rF   fixednobsrT   rS   s   ```    @r   	fitbinnedrX      sx    4 !!6$<<Da((JR R R R R R R R =5)))r   Tc                    	 |t           t          j        |          }|r|t          |          z  	n!t          j        t          |                    	|t          |          z   	fd}t          j        ||          S )a  estimate parameters of distribution function for binned data using GMM

    Parameters
    ----------
    distfn : distribution instance
        needs to have cdf method, as in scipy.stats
    freq : ndarray, 1d
        frequency count, e.g. obtained by histogram
    binedges : ndarray, 1d
        binedges including lower and upper bound
    start : tuple or array_like ?
        starting values, needs to have correct length
    fixed : None
        not used yet
    weightsoptimal : bool
        If true, then the optimal weighting matrix for GMM is used. If false,
        then the identity matrix is used

    Returns
    -------
    paramest : ndarray
        estimated parameters

    Notes
    -----
    todo: add fixed parameter option

    added factorial

    Nc                 ~    t          j         j        g| R            }|z
  }t          j        |z  |          S )rL   )r
   rM   r   dot)r   rP   momcondrQ   r   
freqnormedweightss      r   gmmobjectivez"fitbinnedgmm.<locals>.gmmobjective?  sH    
 wzvz(4V44455t#vggow///r   )rU   r
   r?   floatonesr+   r   rE   )
r   rR   rQ   rF   rV   weightsoptimalrW   r_   r]   r^   s
   ` `     @@r   fitbinnedgmmrc     s    > !!6$<<D %uT{{"'#d))$$eDkk!J0 0 0 0 0 0 0 0 =u---r   c                 j     dd l }d|v s	d|v sd|d<    fd} |j        |fi |} ||          |fS )Nr   stepMaxstepFixgh㈵>c                      | gR  S Nr   )r   argsfuns    r   rB   zhess_ndt.<locals>.<lambda>X  s    ss6)D))) r   )numdifftoolsHessian)rj   parsri   optionsndtfhs   ` `    r   hess_ndtrr   T  sk      I$8$8!	)))))AA!!!!A1T77A:r   c                     t           j        d |j        |g| R  df         }t          j        |          }t          j        |                                           S )a  calculate negative log of Product-of-Spacings

    Parameters
    ----------
    params : array_like, tuple ?
        parameters of the distribution funciton
    xsorted : array_like
        data that is already sorted
    dist : instance of a distribution class
        only cdf method is used

    Returns
    -------
    mps : float
        negative log of Product-of-Spacings


    Notes
    -----
    MPS definiton from JKB page 233
    r   r4   )r
   r_r   rM   rN   mean)r   xsorteddistxcdfDs        r   logmpsrz   \  sS    , 5XTXg////34D
AF1IINNr   c                 R   t          | d          r|                     |          }nt          j        | j                  r4t          j        dg| j        z  |                                dz
  df         }n3t          j        dg| j        z  |                                dz
  df         }|S )a  get starting values for estimation of distribution parameters

    Parameters
    ----------
    dist : distribution instance
        the distribution instance needs to have either a method fitstart
        or an attribute numargs
    data : ndarray
        data for which preliminary estimator or starting value for
        parameter estimation is desired

    Returns
    -------
    x0 : ndarray
        preliminary estimate or starting value for the parameters of
        the distribution given the data, including loc and scale

    fitstartr4   r   )	rC   r|   r
   isfiniteart   rD   minru   )rw   datax0s      r   getstartparamsr   v  s    & tZ   ?]]4  ;tv 	?tDL(488::a<"<=BBtDL(499;;q=2=>BIr   c                     t          j        |          }|t          | |          }|| f}t          |           t	          j        t          ||          S )a  Estimate distribution parameters with Maximum Product-of-Spacings

    Parameters
    ----------
    params : array_like, tuple ?
        parameters of the distribution funciton
    xsorted : array_like
        data that is already sorted
    dist : instance of a distribution class
        only cdf method is used

    Returns
    -------
    x : ndarray
        estimates for the parameters of the distribution given the data,
        including loc and scale


    Nri   )r
   sortr   printr   rE   rz   )rw   r   r   rv   ri   s        r   fit_mpsr     sP    ( gdmmG	zD'**T?D	"III=$////r   __main__z

Example: gamma Distributionz---------------------------r)   g      ?   r0   r1   r/   
   c                     t          j        t          j                            t
          |           t          d d d         z            S )Nr   )r
   rM   r   gammappfr"   r#   )r   s    r   rB   rB     s0    E0J0J2ddPRd80S(T(T r   g      @g      @r   r4   )r   r          @c                 8    t          t          | t                    S rh   r   r   r   r   s    r   rB   rB     s    (8(N(N r   i  )sizec                 8    t          t          | t                    S rh   r   r   s    r   rB   rB     s    .>vvt.T.T r   zscale = z

Example: beta Distributionz--------------------------c                     dS )N)   r   r   r   r   )selfr   s     r   rB   rB     s     r   r.      i  ztrue paramsr   c                 B    g | ]}t          j        t          |          S r   )r   r6   rvsbr7   r8   s     r   r:   r:     s%    ===E#D!,,===r   r;   c           	          t          j        t          t          j        | t
          t          t          fd           dz            S r=   )r
   r?   r,   r   betar   r"   xqsbr   s    r   rB   rB     sQ    PUPZ\bdikmnrjs{  AA  AA  AA  CD  AD  :E  :E r   )r   r   r   r4   )maxiterbetaparest_gmmquantilez

Example: t Distributionz-----------------------)r   r   r   r   c                 B    g | ]}t          j        t          |          S r   r   r6   trvsr   s     r   r:   r:     %    
<
<
<5"4++
<
<
<r   c                 R    t          t          | t          t          t          f          S rh   )r%   r   r   r"   rA   r   s    r   rB   rB     s    ?PQWX^`egijmfn?o?o r   )r   r4   r   tparest_gmm3quantilefsolvec           	      |    t          j        t          t          | t          t
          t          f          dz            S )Nr)   )r
   r?   r%   r   r   r"   rA   r   s    r   rB   rB     s/    rv>OPVW]_dfhilem>n>npq>q7r7r r   tparest_gmm3quantilec                 <    t          t          | t          d          S Nr   r>   )r'   r   r   r   s    r   rB   rB     s    (;FFEXY(Z(Z(Z r   c                 f    t          j        t          t          | t          d          dz            S )Nr   r>   r)   )r
   r?   r'   r   r   r   s    r   rB   rB   	  s*    bf-@QV]^-_-_-_ab-b&c&c r   c                 V    t          t          | t          t          t          fd          S r   )r'   r   r   r"   rA   r   s    r   rB   rB     s"    (;FFESUVYRZab(c(c(c r   c                 B    g | ]}t          j        t          |          S r   r   r   s     r   r:   r:     r   r   c           	          t          j        t          t          | t          t
          t          fd           dz            S r=   )r
   r?   r,   r   r   r"   rA   r   s    r   rB   rB     s3    bf_VU[]bdfgjcksw=x=x=xz{={6|6| r   tparest_gmmquantile)rF   r@   rG   tparest_gmmquantile2   )binszfitbinned t-distribution)r   r   r   F)rb   2      )	precisionzsample sizeztrue (df, loc, scale)      zparest_mle                 ztparest_mlebinel           ztparest_gmmbinelidentity   ztparest_gmmbineloptimal    ztparest_mlebinew           ztparest_gmmbinewidentity   ztparest_gmmbinewoptimal    tparest_gmmquantileidentityztparest_gmm3quantilefsolve ztparest_gmm3quantile       z!

Example: Lognormal Distributionz-------------------------------   )r    r   r   )r    r   r   z:

Example: Lomax, Pareto, Generalized Pareto Distributionsz8--------------------------------------------------------i  izgpdparest_ mlezgpdparest_ mpsgHz>)rf   c                 &    t          | gt          R  S rh   )rz   argsgpdr   s    r   rB   rB     s    vf/w/// r   gpdparest_mlebinelgpdparest_gmmbinelidentitygpdparest_gmmquantile2r2   zfitbinnedgmm equal weight binsrh   )NN)NNN)NT)e__doc__numpyr
   scipyr   r   r   cacher   r   r%   r'   r,   rI   rX   rc   rr   rz   r   r   __name__r   r   r#   r"   r   r   rM   linspacefsolver   mcondr   rvsgrvsr   ru   var	alphaestqdistributionsbeta_genr3   r   r   fitr   r   rE   r   rW   t	paramsdgpr   rA   mom2thr   r   r   r   bt	histogramfttparest_mlebinewtparest_gmmbinewidentitytparest_gmmbinewoptimalft2bt2tparest_mlebineltparest_gmmbinelidentitytparest_gmmbineloptimaltparest_mleset_printoptionsexpshlognormr9   r   r   rv   r   ri   	genparetop2rvsp2rvssortedparetoargspx0pparsgpdr   dictrn   herq   linalgeighrp   fp2bp2r   r   r   r   r   r   <module>r      sY  W Wr     * * * * * * * * * *
   (       .   ,   2   B   "&* &* &* &*R1. 1. 1. 1.h    4  :0 0 0 0< z
 
E
+,,,	E+,,,E
qB
sB	E%+//"e
$
$%%%	U	#	#B	E'"'%+//"kbk$q&<&<QQQtV&DEEb2hN
O
OPPP	E/(/TTVX
Y
YZZZ[FOFRG%+2C2CE2b2Q2Q\`aaaE	E/(/%"R
)
)***;UBr**D	E/(/NNQSTVPW
X
XYYY;??5"Rd?33D28TYY[[$((**-..D T TWYZ\V]^^I	E)	E*bY777888 
E
*+++	E*+++ .K-JE *	888	9	9B:>>"RT>**D	E-RA&&&	E%*..

==bf===DBHdiikk488::.//E*X]  ,E  ,E+:??DJ J J	E
"%;<<< 
E
'(((	E'(((DWF	3s)		BI::aAD:))D
<
<RV
<
<
<CV\9%FBHdiikk488::.//E!01o1oq{q{q{!|!|	E
&(BCCC(8=)r)rt~t~t~	E
 "6777	E&**T

 
E/(/ZZ]_`b\c
d
deee	E-(-ccfhikel
m
mnnn	E&**T

	E/(/ccfhikel
m
mnnn 
888	9	9B
<
<RV
<
<
<C'(-(|(|  J  J  J  K  K	E
!4555*?64{{{SW`deee	E
 "6777 
[R[1R((	+	+BBL2&&&EBr	E
$%%% y"b***==+|EGRZZZHH*l57BJJJW\]]]	E) bl4R(((GC y#sJJJ??+|EGS#zzzJJ*l57CjjjY^___'++d##KB!$$$$	E-	E
'333	E
'555	E	E
')9:::	E
')ABBB	E
')@AAA	E	E
')9:::	E
')ABBB	E
')@AAA	E	E
')<===	E
')CDDD	E
')=>>>@ 
E
/000	E/000	B	B	E"III"BC88A	E!%%''NNN	E%-

Aquuwwqyq

9
9:::bgajjG
aeeggai	BU]#D	E-(-rt
,
,
,--- 
E
HIII	EHIIIO,,E"'%..K%,'Euyy{{1}a
 C	E-(-s
.
.
.///	E%,

5#3c

:
:;;;	E
EO//66777geou--G	E
G$$$EO,Gd4   G HVWgw77EB	E")..

Q
   //A	E!!G**r|E+++HC"5?CcBB!-eosC!M!M	E
 2333	E
&(BCCC,_c$tE E E	E
"$:;;;	E//%/5!,T$r!:!:4I I I J J Jr|U_Q##KBK$r$:$:;;= = =HC 
E
*+++	E,,uS#
6
677777i r   