
    M/Ph]              
          d Z ddlmZ ddlZddlmZmZmZ ej	         ej
        j        _        ej	        ej
        j        _        d Zd8dZd8dZd Zd	 Zd9dZd9dZ	 	 d:dZeej
        j        _        eej
        j        _        eej
        j        _        eej
        j        _        eej
        j        _        eej
        j        _        d;dZdi fdZd<dZedk    rg ddd         Zdev r ed           dD ]Z ej        !                    ddde           Z" ede             ed            ed            ed            eej        #                    e"                      eej                            e"ej$        ej$        ej$        g                       ed!            eej                            e"ej$        d"d#g                       ed$           ej%        Z&d%\  Z'Z(Z)dD ]Z e&!                    e'e(e)e           Z" ede             ed            ee'd&d'e(d&d(e)d&            ed            ee&#                    e"                      ee&                    e"ej$        ej$        ej$        g                       ed!            ee&                    e"ej$        d"d#g                       ed)            ee&                    e"ej$        d"ej$        g                      d*d+gd         Z*e*d*k    rej%        Z&d,\  Z'Z(Z)ne*d+k    rej        Z&d-\  Z'Z(Z)n e+d.          dZ d/Z,e&!                    e'e(e)e           Z- ed0e&           dev r& ed1            ee-e&e,2          Z. ee-e'e.           d3ev r9 ed4            ee-e&e, e/e'e(e)5          6          Z0 ee-e'e0d37           dS dS dS )=a  patching scipy to fit distributions and expect method

This adds new methods to estimate continuous distribution parameters with some
fixed/frozen parameters. It also contains functions that calculate the expected
value of a function for any continuous or discrete distribution

It temporarily also contains Bootstrap and Monte Carlo function for testing the
distribution fit, but these are neither general nor verified.

Author: josef-pktd
License: Simplified BSD
    )lmapN)statsoptimize	integratec                     t          j        |                                dg          }dt          j        |          dz  z  }t          j        |          t          j        |          z  }|||fS )a  example method, method of moment estimator as starting values

    Parameters
    ----------
    x : ndarray
        data for which the parameters are estimated

    Returns
    -------
    est : tuple
        preliminary estimates used as starting value for fitting, not
        necessarily a consistent estimator

    Notes
    -----
    This needs to be written and attached to each individual distribution

    This example was written for the gamma distribution, but not verified
    with literature

    r         )npminr   skewstdsqrt)selfxlocascales        i/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/sandbox/distributions/sppatch.py	_fitstartr      s\    , &!%%''!

C	%*Q--
AF1II

"EsE?    c                    |                                 |                                }}||z
  dz  }|||z
  }||z
  dd|z  z   z  }nSt          j        |d                   r||z
  }n|d         }t          j        |d                   r	||z   |z
  }n|d         }t	          |          }||z
  |z  }|                                }	|                                }
|	d|	z
  z  |
z  dz
  }|	|z  }d|	z
  |z  }||||fS )a  method of moment estimator as starting values for beta distribution

    Parameters
    ----------
    x : ndarray
        data for which the parameters are estimated
    fixed : None or array_like
        sequence of numbers and np.nan to indicate fixed parameters and parameters
        to estimate

    Returns
    -------
    est : tuple
        preliminary estimates used as starting value for fitting, not
        necessarily a consistent estimator

    Notes
    -----
    This needs to be written and attached to each individual distribution

    References
    ----------
    for method of moment estimator for known loc and scale
    https://en.wikipedia.org/wiki/Beta_distribution#Parameter_estimation
    http://www.itl.nist.gov/div898/handbook/eda/section3/eda366h.htm
    NIST reference also includes reference to MLE in
    Johnson, Kotz, and Balakrishan, Volume II, pages 221-235

    g{Gz?N   r	   )r   maxr
   isnanfloatmeanvar)r   r   fixedr   bepsr   r   xtransxmxvtmppqs                 r   _fitstart_betar)   8   s   B 5577AEEGGqAQ3*C}#gQ1qu9%8E"I 	c'CC)C8E"I 	WOEE"IE %LLE#gu_F	B	Bqt9R<!C
SA	
R3Aq#ur   c                     |                                 }d}|||z
  }n(t          j        |d                   r||z
  }n|d         }||z
  }|                                }||fS )a  maximum likelihood estimator as starting values for Poisson distribution

    Parameters
    ----------
    x : ndarray
        data for which the parameters are estimated
    fixed : None or array_like
        sequence of numbers and np.nan to indicate fixed parameters and parameters
        to estimate

    Returns
    -------
    est : tuple
        preliminary estimates used as starting value for fitting, not
        necessarily a consistent estimator

    Notes
    -----
    This needs to be written and attached to each individual distribution

    References
    ----------
    MLE :
    https://en.wikipedia.org/wiki/Poisson_distribution#Maximum_likelihood

    r   Nr   )r   r
   r   r   )r   r   r    r   r"   r   r#   lambds           r   _fitstart_poissonr,   v   st    < 	
A
C}#g8E"I 	c'CC)C #gFKKMME 3<r   c                 2   	 |,|                                 }||t          j        |          <   n|}|d         }|d         }t          |d d                   }n# t          $ r t          d          w xY w | j        | r|dk    rt          j        S t          j        ||z
  |z            }|| j	        k    || j
        k    z  }t          j        |          rt          j        S t          |          }	 | j        |g|R  |	t          j        |          z  z   S )Nr   r   zNot enough input arguments.r   )copyr
   r   tuple
IndexError
ValueError	_argcheckinfarrayr   r!   anylen_nnlflog)
r   thetashr   frmaskthetar   r   argscond0Ns
             r   nnlf_frr?      s#   

8KKMME&-E"(6""##EBib	U3B3Z   8 8 8677784>4  EQJJv
!C%5!!A$&[Q$&[)E
u 6vFFtz!#d###auo55s   AA A4c                 |   t          |j        ddgddg          \  }}t          |          }|dk    r&t          | d          r|                     |          }n1|| j        k    rt          d          |d| j        |z
  z  z  }|||fz   }d	|v rt          j        |d	                   }t          |          | j        d
z   k    rt          d          t          t          |                    D ]P}	t          ||	         t          j                  r.||	         j        dk    r||	                                         ||	<   Q|                    t          j                  }t          j        |          t          j        |                   }nd}t#          j        | j        |t          j        |          |fd          S )a  estimate distribution parameters by MLE taking some parameters as fixed

    Parameters
    ----------
    data : ndarray, 1d
        data for which the distribution parameters are estimated,
    args : list ? check
        starting values for optimization
    kwds :

      - 'frozen' : array_like
           values for frozen distribution parameters and, for elements with
           np.nan, the corresponding parameter will be estimated

    Returns
    -------
    argest : ndarray
        estimated parameters


    Examples
    --------
    generate random sample
    >>> np.random.seed(12345)
    >>> x = stats.gamma.rvs(2.5, loc=0, scale=1.2, size=200)

    estimate all parameters
    >>> stats.gamma.fit(x)
    array([ 2.0243194 ,  0.20395655,  1.44411371])
    >>> stats.gamma.fit_fr(x, frozen=[np.nan, np.nan, np.nan])
    array([ 2.0243194 ,  0.20395655,  1.44411371])

    keep loc fixed, estimate shape and scale parameters
    >>> stats.gamma.fit_fr(x, frozen=[np.nan, 0.0, np.nan])
    array([ 2.45603985,  1.27333105])

    keep loc and scale fixed, estimate shape parameter
    >>> stats.gamma.fit_fr(x, frozen=[np.nan, 0.0, 1.0])
    array([ 3.00048828])
    >>> stats.gamma.fit_fr(x, frozen=[np.nan, 0.0, 1.2])
    array([ 2.57792969])

    estimate only scale parameter for fixed shape and loc
    >>> stats.gamma.fit_fr(x, frozen=[2.5, 0.0, np.nan])
    array([ 1.25087891])

    Notes
    -----
    self is an instance of a distribution class. This can be attached to
    scipy.stats.distributions.rv_continuous

    *Todo*

    * check if docstring is correct
    * more input checking, args is list ? might also apply to current fit method

    r   r                 ?r   r   zToo many input arguments.)rB   frozenr	   z%Incorrect number of frozen arguments.r   N)r<   disp)r   getr6   hasattrr   numargsr1   r
   r4   range
isinstancendarraysizeitemastypefloat64r   r   fminr?   ravel)
r   datar<   kwdsloc0scale0Nargx0r:   ns
             r   fit_frrX      s   t 5'"2C:>>LD&t99DqyyWT;//y^^D!!			4555T)**T6N"4$x.))v;;$,q.((DEEE 3v;;'' 1 1fQi44 119L9L &q	 0 0F1I ]]2:..F(2,,rx//0BB =rhtnnf-A7 7 7 7r    r   Fc                       fd}n fd}| j         z  z   }| j        z  z   }|r&  j        |g|R d  j        |g|R dz
  }	nd}	t          j        ||||          d         |	z  S )a  calculate expected value of a function with respect to the distribution

    location and scale only tested on a few examples

    Parameters
    ----------
        all parameters are keyword parameters
        fn : function (default: identity mapping)
           Function for which integral is calculated. Takes only one argument.
        args : tuple
           argument (parameters) of the distribution
        lb, ub : numbers
           lower and upper bound for integration, default is set to the support
           of the distribution
        conditional : bool (False)
           If true then the integral is corrected by the conditional probability
           of the integration interval. The return value is the expectation
           of the function, conditional on being in the given interval.

    Returns
    -------
        expected value : float

    Notes
    -----
    This function has not been checked for it's behavior when the integral is
    not finite. The integration behavior is inherited from scipy.integrate.quad.

    Nc                 .    |  j         | g|R dz  S Nr   r   pdfr   r<   r   r   r   s     r   funzexpect.<locals>.funI  s,    XTXa===S=====r   c                 @     |            j         | g|R dz  S r\   r^   r   r<   fnr   r   r   s     r   ra   zexpect.<locals>.funL  s4    2a55!ADAAEAAAAAr   r]   rB   )r<   r   )r   r!   sfr   quad
r   rd   r<   r   r   lbubconditionalra   invfacs
   `` ``     r   expectrl   *  s   < 
z	> 	> 	> 	> 	> 	> 	> 	>	B 	B 	B 	B 	B 	B 	B 	B	z46E>!	z46E>! $'":T::#U:::DGB<t<<Cu<<<= >#r2%)+ + ++,..45 5r   c                      fd}n fd}|'	   j         dg|R  }n5# t          $ r
  j        }Y n"w xY wt           j        |z
  dz  z            }|'	   j         dg|R  }n5# t          $ r
  j        }Y n"w xY wt           j        |z
  dz  z            }|r  j        |g|R    j        |g|R  z
  }	nd}	t          j        ||||d          d	         |	z  S )
a  calculate expected value of a function with respect to the distribution

    location and scale only tested on a few examples

    Parameters
    ----------
        all parameters are keyword parameters
        fn : function (default: identity mapping)
           Function for which integral is calculated. Takes only one argument.
        args : tuple
           argument (parameters) of the distribution
        lb, ub : numbers
           lower and upper bound for integration, default is set using
           quantiles of the distribution, see Notes
        conditional : bool (False)
           If true then the integral is corrected by the conditional probability
           of the integration interval. The return value is the expectation
           of the function, conditional on being in the given interval.

    Returns
    -------
        expected value : float

    Notes
    -----
    This function has not been checked for it's behavior when the integral is
    not finite. The integration behavior is inherited from scipy.integrate.quad.

    The default limits are lb = self.ppf(1e-9, *args), ub = self.ppf(1-1e-9, *args)

    For some heavy tailed distributions, 'alpha', 'cauchy', 'halfcauchy',
    'levy', 'levy_l', and for 'ncf', the default limits are not set correctly
    even  when the expectation of the function is finite. In this case, the
    integration limits, lb and ub, should be chosen by the user. For example,
    for the ncf distribution, ub=1000 works in the examples.

    There are also problems with numerical integration in some other cases,
    for example if the distribution is very concentrated and the default limits
    are too large.

    Nc                 2    | z  z    j         | g|R  z  S N_pdfr`   s     r   ra   zexpect_v2.<locals>.fun  s*    !E'M949Q#6#6#6#666r   c                 D     | z  z              j         | g|R  z  S ro   rp   rc   s     r   ra   zexpect_v2.<locals>.fun  s4    2cAeGm$$YTYq%84%8%8%888r   g&.>rB   gv?i  )r<   limitr   )	ppfr1   r   r   r!   r   _sfr   rf   rg   s
   `` ``     r   	expect_v2rv   [  s   ^ 
z	7 	7 	7 	7 	7 	7 	7 	7	9 	9 	9 	9 	9 	9 	9 	9	z	$&&&&BB 	 	 	BBB	 "s(SY/00	z	&(4(((BB 	 	 	BBB	 "s(SY/00 "#d###hdhr&84&8&8&88>#r2%)6 6 66799?@ @s   ) ==!A0 0BBc                     d}d} fd}	n fd}	  j           | j        }n|z
  }| j        }n|z
  }|r!  j        |gR    j        |dz   gR  z
  }
nd}
d}  j        d	gR    j        d
gR  }}t          t          | |          |          }t          t          ||          |          }t          j        ||dz    j	                  }t          j
         |	|                    }d}| j	        z   }d}||k    rG| j        k    r<||k    r6 |	|          }||z  }| j	        z  }|dz  }||k    r| j        k    r||k    6 j        dk     rYd}| j	        z
  }||k    rG| j        k    r<||k    r6 |	|          }||z  }| j	        z  }|dz  }||k    r| j        k    r||k    6||k    rt          d           ||
z  S )a  calculate expected value of a function with respect to the distribution
    for discrete distribution

    Parameters
    ----------
        (self : distribution instance as defined in scipy stats)
        fn : function (default: identity mapping)
           Function for which integral is calculated. Takes only one argument.
        args : tuple
           argument (parameters) of the distribution
        optional keyword parameters
        lb, ub : numbers
           lower and upper bound for integration, default is set to the support
           of the distribution, lb and ub are inclusive (ul<=k<=ub)
        conditional : bool (False)
           If true then the expectation is corrected by the conditional
           probability of the integration interval. The return value is the
           expectation of the function, conditional on being in the given
           interval (k such that ul<=k<=ub).

    Returns
    -------
        expected value : float

    Notes
    -----
    * function is not vectorized
    * accuracy: uses self.moment_tol as stopping criterium
        for heavy tailed distribution e.g. zipf(4), accuracy for
        mean, variance in example is only 1e-5,
        increasing precision (moment_tol) makes zipf very slow
    * suppnmin=100 internal parameter for minimum number of points to evaluate
        could be added as keyword parameter, to evaluate functions with
        non-monotonic shapes, points include integers in (-suppnmin, suppnmin)
    * uses maxcount=1000 limits the number of points that are evaluated
        to break loop for infinite sums
        (a maximum of suppnmin+1000 positive plus suppnmin+1000 negative integers
        are evaluated)


      d   Nc                 ,    | z    j         | gR  z  S ro   _pmf)r   r<   r   r   s    r   ra   zexpect_discrete.<locals>.fun  s&    cE949Q......r   c                 >     | z              j         | gR  z  S ro   r{   )r   r<   rd   r   r   s    r   ra   zexpect_discrete.<locals>.fun  s.    2ae99YTYq0400000r   r   rB   rA   gMbP?g+?g}Ô%ITr   zsum did not converge)r2   r   r!   re   _ppfr   r   r
   arangeincsum
moment_tolprint)r   rd   r<   r   rh   ri   rj   maxcountsuppnminra   rk   totlowuppsuppdiffposcounts   ````              r   expect_discreter     s   ` HH	z	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/	1 	1 	1 	1 	1 	1 	1 	1 DND	zf#X	zf#X "T"""WTWRT%84%8%8%88
Cty&&&&		%(?$(?(?(?C
c8)S!!2
&
&C
c(C  "
%
%C9S#a%**D
&T

CD
.CE "994$/11u7H7Hs3xxttx
	 "994$/11u7H7H vzzDHnbyytdo555H;L;L3s88D4KC48OCQJE	 byytdo555H;L;L
 x$%%%v:r   ry   c                    t          |           }t          j        |          }t          |          D ]R}t          j                            ||          }| |         }|                    |t          j        ddg          ||<   S|S )a  run bootstrap for estimation of distribution parameters

    hard coded: only one shape parameter is allowed and estimated,
        loc=0 and scale=1 are fixed in the estimation

    Parameters
    ----------
    sample : ndarray
        original sample data for bootstrap
    distr : distribution instance with fit_fr method
    nrepl : int
        number of bootstrap replications

    Returns
    -------
    res : array (nrepl,)
        parameter estimates for all bootstrap replications

    )rK   rA   rB   rC   )r6   r
   zerosrH   randomrandintrX   nan)sampledistrnreplnobsresiirvsindr   s           r   distfitbootstrapr   %  s}    ( v;;D
(5//CEll = =""4d"336N,,q"&#s);,<<BJr   c                    |                     d          }t          |           }t          j        |          }t	          |          D ]9} |j        |fd|i|}|                    |t          j        ddg          ||<   :|S )a  run Monte Carlo for estimation of distribution parameters

    hard coded: only one shape parameter is allowed and estimated,
        loc=0 and scale=1 are fixed in the estimation

    Parameters
    ----------
    sample : ndarray
        original sample data, in Monte Carlo only used to get nobs,
    distr : distribution instance with fit_fr method
    nrepl : int
        number of Monte Carlo replications

    Returns
    -------
    res : array (nrepl,)
        parameter estimates for all Monte Carlo replications

    argrK   rA   rB   r   )popr6   r
   r   rH   rvsrX   r   )	r   r   r   distkwdsr   r   r   r   r   s	            r   	distfitmcr   A  s    ( ,,u

Cv;;D
(5//CEll = =EIc11111,,q"&#s);,<<BJr   	bootstrapc           	      ,   t          d           t          |           t          dt          z             t                              | t          j        ddg          }t          |           |dk    r|}|}t          d|t          fz             t          d|                    d	          d
d|                    d	          |z
  d
           t          dt	          j        |d	                     t          d|	                    d	          t	          j
        |	                    d	                               ||z
  dz                      d	          }t          d|t	          j
        |                     t	          j        |          }t          d|z             t          |t	          j        t          dz                     |t	          j        t          dz                                t          d|z             t          t          j                            d|                                |                                                     t          t          j                            d|                                |                                                     t          d|z             t          d           t          t          j        |d|                                |                                f                     dS )a  calculate and print(Bootstrap or Monte Carlo result

    Parameters
    ----------
    sample : ndarray
        original sample data
    arg : float   (for general case will be array)
    bres : ndarray
        parameter estimates from Bootstrap or Monte Carlo run
    kind : {'bootstrap', 'montecarlo'}
        output is printed for Mootstrap (default) or Monte Carlo

    Returns
    -------
    None, currently only printing

    Notes
    -----
    still a bit a mess because it is used for both Bootstrap and Monte Carlo

    made correction:
        reference point for bootstrap is estimated parameter

    not clear:
        I'm not doing any ddof adjustment in estimation of variance, do we
        need ddof>0 ?

    todo: return results and string instead of printing

    ztrue parameter valuez1MLE estimate of parameters using sample (nobs=%d)rA   rB   r   r   z0%s distribution of parameter estimate (nrepl=%d)zmean = r   fz, bias=median)axiszvar and stdr	   z	mse, rmsez&%s confidence interval (90%% coverage)g?gffffff?z;%s confidence interval (90%% coverage) normal approximationr]   z8Kolmogorov-Smirnov test for normality of %s distributionz4 - estimated parameters, p-values not really correctnormN)r   r   r   rX   r
   r   r   r   r   r   r   sortfloorr   r   rt   r   isfkstest)r   r   breskindargestargorigbmse
bressorteds           r   printresultsr   ^  ss   > 

 !!!	#JJJ	
=
EFFF\\&"&#s);\<<F	&MMM{	
<tUm
KLLL	
?DIIaLL
?
?
?499Q<<+;
?
?
?@@@	(BId+++,,,	-!bgdhhqkk&:&:;;;CZ!O!!!$$D	+tRWT]]+++J	
2T
9:::	*RXeDj))
*Jrxd
7K7K,LMMM	
G$
NOOO	%*..499;;dhhjj.
A
ACCC	%*..499;;dhhjj.
A
ABBB	
Dt
KLLL	
@AAA	%,tVdiikk488::%>
?
?@@@@@r   __main__)largenumberr   
montecarlor   z
Distribution: vonmises)   gGz?)r   r   rK   z
nobs:ztrue parameterz1.23, loc=0, scale=1unconstrainedr   zwith fixed loc and scalerA   rB   z
Distribution: gamma)      @rA   g      4@r   z, loc=z, scale=zwith fixed locgammavonmises)r   rA   r   )g      ?rA   r   zwrong examplerx   z
Distribution:z

Bootstrap)r   r   z
MonteCarlo)r   r   r   )r   r   )r   ro   )NrY   r   r   NNF)NrY   r   NNF)ry   )r   )1__doc__statsmodels.compat.pythonr   numpyr
   scipyr   r   r   pidistributionsr   r   r!   r   r)   r,   r?   rX   rl   rv   r   rv_continuousrv_discretebeta_genpoisson_genr   r   r   __name__examplecasesr   r   r   r   fitr   r   r   r   r   r   exr1   r   r   r   dictmcresrY   r   r   <module>r      s    + * * * * *     , , , , , , , , , , #%%   !#   
  6< < < <|1 1 1 1h6 6 66[7 [7 [7J.5 .5 .5 .5bJ@ J@ J@ J@d @D %l l l l\ ,2  ! (,3  ! )+1  ! ()8   &)7   &,=   )
   8 $'    :7A 7A 7A 7At z===aaa@L$$())) 		G 		GD""4Qad"CCAE)T"""E"###E()))E/"""E%.$$Q''(((E%.''2626262J'KKLLLE,---E%.''2632D'EEFFFF%&&&&S% 	A 	AD		#3e$	??AE)T"""E"###ES:::#::::::;;;E/"""E%))A,,E%,,q"&"&"&)A,BBCCCE,---E%,,q"&#s);,<<===E"###E%,,q"&#rv)>,??@@@@ :	q	!B	W}}$S%%	z		$S%%j)))DEYYs5tY<<F	E
U###l""mU<<<VS$'''|##n	&%u#'4CS#F#F#FH H HVS%l;;;;;; v $#r   