
    M/PhqS                        d Z ddlmZ ddlZddlmZ ddlmZ ddl	m
Z d Zd?dZd ZeZd Zd Zd Zd Zd Zi Zeeeeeeeded<   d Zd d d ded<   deied<    G d d          Zd@d ZdAd!ZdBd#ZdCd$Z G d% d&          Zed'k    r<dd(lmZ ej         !                    d)d"*          Z! e"d+            e" ee!d,                      ee!d,          Z# e"e#$                                           g d-Z%e%D ]"Z& e"e&e#$                    e&d                     # e"d.           dd/l'm(Z(  e(e)          Z*d"Z+ e,d          D ]hZ-ej.        /                    e+          Z! ee!d,          Z#e%D ]=Z&e*e&         0                    e#$                    e&d          d         d0                    >i ej1        d1 e%D                       Z2 e"d2d33                    e%                      e"d4e2d5k     4                    d0                      e"d6e2d7k     4                    d0                      e"d8e2d9k     4                    d0                      ed: d,d";           d"Z+dZ5 e e            d<e+e5d=          Z6 ej7        e5 ej1        g d>          z            8                    e9          Z: e"e6e:                    dS dS )Dat  More Goodness of fit tests

contains

GOF : 1 sample gof tests based on Stephens 1970, plus AD A^2
bootstrap : vectorized bootstrap p-values for gof test with fitted parameters


Created : 2011-05-21
Author : Josef Perktold

parts based on ks_2samp and kstest from scipy.stats
(license: Scipy BSD, but were completely rewritten by Josef Perktold)


References
----------

    )lmapN)distributions)cache_readonly)
kolmogorovc                    t          t          j        | |f          \  } }| j        d         }|j        d         }t	          |           }t	          |          }t          j        |           } t          j        |          }t          j        | |g          }t          j        | |d          d|z  z  }t          j        ||d          d|z  z  }t          j        t          j	        ||z
                      }t          j
        ||z  t          ||z             z            }	 t          |dz   d|z  z   |z            }	n	#  d}	Y nxY w||	fS )aA  
    Computes the Kolmogorov-Smirnof statistic on 2 samples.

    This is a two-sided test for the null hypothesis that 2 independent samples
    are drawn from the same continuous distribution.

    Parameters
    ----------
    a, b : sequence of 1-D ndarrays
        two arrays of sample observations assumed to be drawn from a continuous
        distribution, sample sizes can be different


    Returns
    -------
    D : float
        KS statistic
    p-value : float
        two-tailed p-value


    Notes
    -----

    This tests whether 2 samples are drawn from the same distribution. Note
    that, like in the case of the one-sample K-S test, the distribution is
    assumed to be continuous.

    This is the two-sided test, one-sided tests are not implemented.
    The test uses the two-sided asymptotic Kolmogorov-Smirnov distribution.

    If the K-S statistic is small or the p-value is high, then we cannot
    reject the hypothesis that the distributions of the two samples
    are the same.

    Examples
    --------

    >>> from scipy import stats
    >>> import numpy as np
    >>> from scipy.stats import ks_2samp

    >>> #fix random seed to get the same result
    >>> np.random.seed(12345678)

    >>> n1 = 200  # size of first sample
    >>> n2 = 300  # size of second sample

    different distribution
    we can reject the null hypothesis since the pvalue is below 1%

    >>> rvs1 = stats.norm.rvs(size=n1,loc=0.,scale=1)
    >>> rvs2 = stats.norm.rvs(size=n2,loc=0.5,scale=1.5)
    >>> ks_2samp(rvs1,rvs2)
    (0.20833333333333337, 4.6674975515806989e-005)

    slightly different distribution
    we cannot reject the null hypothesis at a 10% or lower alpha since
    the pvalue at 0.144 is higher than 10%

    >>> rvs3 = stats.norm.rvs(size=n2,loc=0.01,scale=1.0)
    >>> ks_2samp(rvs1,rvs3)
    (0.10333333333333333, 0.14498781825751686)

    identical distribution
    we cannot reject the null hypothesis since the pvalue is high, 41%

    >>> rvs4 = stats.norm.rvs(size=n2,loc=0.0,scale=1.0)
    >>> ks_2samp(rvs1,rvs4)
    (0.07999999999999996, 0.41126949729859719)
    r   right)side      ?Q?)\(?)r   npasarrayshapelensortconcatenatesearchsortedmaxabsolutesqrtfloatksprob)
data1data2n1n2data_allcdf1cdf2denprobs
             i/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/sandbox/distributions/gof_new.pyks_2sampr$      s4   P 
UEN33LE5	QB	QB	UB	UBGENNEGENNE~uUm,,H?5w777R@DOE(8883r6BD
r{49%%&&A	BuRU||#	$	$Br$wtBw)**d7Ns   $E   E    	two_sidedapproxc                 L   t          | t                    rL|r|| k    r5t          t          |           j        }t          t          |           j        } nt          d          t          |t                    rt          t          |          j        }t          |           rd|i}t          j	         | |i |          }n#t          j	        |           }t          |          } ||g|R  }|dv rXt          j        d|dz             |z  |z
                                  }	|dk    r"|	t          j                            |	|          fS |dv rU|t          j        d|          |z  z
                                  }
|d	k    r"|
t          j                            |
|          fS |d
k    rt          j        |	|
g          }|dk    r6|t          j                            |t          j        |          z            fS |dk    rt          j                            |t          j        |          z            }|dk    s|d|dz  dz  z
  k    r6|t          j                            |t          j        |          z            fS |t          j                            ||          dz  fS dS dS )a  
    Perform the Kolmogorov-Smirnov test for goodness of fit

    This performs a test of the distribution G(x) of an observed
    random variable against a given distribution F(x). Under the null
    hypothesis the two distributions are identical, G(x)=F(x). The
    alternative hypothesis can be either 'two_sided' (default), 'less'
    or 'greater'. The KS test is only valid for continuous distributions.

    Parameters
    ----------
    rvs : str or array or callable
        string: name of a distribution in scipy.stats

        array: 1-D observations of random variables

        callable: function to generate random variables, requires keyword
        argument `size`

    cdf : str or callable
        string: name of a distribution in scipy.stats, if rvs is a string then
        cdf can evaluate to `False` or be the same as rvs
        callable: function to evaluate cdf

    args : tuple, sequence
        distribution parameters, used if rvs or cdf are strings
    N : int
        sample size if rvs is string or callable
    alternative : 'two_sided' (default), 'less' or 'greater'
        defines the alternative hypothesis (see explanation)

    mode : 'approx' (default) or 'asymp'
        defines the distribution used for calculating p-value

        'approx' : use approximation to exact distribution of test statistic

        'asymp' : use asymptotic distribution of test statistic


    Returns
    -------
    D : float
        KS test statistic, either D, D+ or D-
    p-value :  float
        one-tailed or two-tailed p-value

    Notes
    -----

    In the one-sided test, the alternative is that the empirical
    cumulative distribution function of the random variable is "less"
    or "greater" than the cumulative distribution function F(x) of the
    hypothesis, G(x)<=F(x), resp. G(x)>=F(x).

    Examples
    --------

    >>> from scipy import stats
    >>> import numpy as np
    >>> from scipy.stats import kstest

    >>> x = np.linspace(-15,15,9)
    >>> kstest(x,'norm')
    (0.44435602715924361, 0.038850142705171065)

    >>> np.random.seed(987654321) # set random seed to get the same result
    >>> kstest('norm','',N=100)
    (0.058352892479417884, 0.88531190944151261)

    is equivalent to this

    >>> np.random.seed(987654321)
    >>> kstest(stats.norm.rvs(size=100),'norm')
    (0.058352892479417884, 0.88531190944151261)

    Test against one-sided alternative hypothesis:

    >>> np.random.seed(987654321)

    Shift distribution to larger values, so that cdf_dgp(x)< norm.cdf(x):

    >>> x = stats.norm.rvs(loc=0.2, size=100)
    >>> kstest(x,'norm', alternative = 'less')
    (0.12464329735846891, 0.040989164077641749)

    Reject equal distribution against alternative hypothesis: less

    >>> kstest(x,'norm', alternative = 'greater')
    (0.0072115233216311081, 0.98531158590396395)

    Do not reject equal distribution against alternative hypothesis: greater

    >>> kstest(x,'norm', mode='asymp')
    (0.12464329735846891, 0.08944488871182088)


    Testing t distributed random variables against normal distribution:

    With 100 degrees of freedom the t distribution looks close to the normal
    distribution, and the kstest does not reject the hypothesis that the sample
    came from the normal distribution

    >>> np.random.seed(987654321)
    >>> stats.kstest(stats.t.rvs(100,size=100),'norm')
    (0.072018929165471257, 0.67630062862479168)

    With 3 degrees of freedom the t distribution looks sufficiently different
    from the normal distribution, that we can reject the hypothesis that the
    sample came from the normal distribution at a alpha=10% level

    >>> np.random.seed(987654321)
    >>> stats.kstest(stats.t.rvs(3,size=100),'norm')
    (0.131016895759829, 0.058826222555312224)
    5if rvs is string, cdf has to be the same distributionsize)r'   greaterr
      r,   )r'   less        r.   r'   asympr(   j
  皙?333333?     @@   N)
isinstancestrgetattrr   cdfrvsAttributeErrorcallabler   r   r   aranger   ksonesf	kstwobignr   )r:   r9   argsNalternativemodekwdsvalscdfvalsDplusDminDpval_twos                r#   kstestrL   }   s   f #s Z 	Z---1C---1CC !XYYY #s .mS))-}} qzwssD(4(())ws||IIc$G...3!$$Q&05577)##--00q9999+++")C++A--2244&  ,//Q7777k!!FE$<  7??m-00271::>>>>8$.11!BGAJJ,??H4xx8dQsU6\&999-144Qrwqzz\BBBB--00155a777 "!     c                     t          j        |          dz   dt          j        |          z  z   }| |z  }t          j        d|dz  z            }t          j        | t          j        g d          k              }|||fS )Nr   r   r5   )=
ףp=?rP   r
   r   r   expsumarraystatnobs
mod_factorstat_modifiedpvaldigitss         r#   dplus_st70_uppr\     s{    %rwt}}(<<J:%M6"}a''((DVD28$6$6$677788F$&&rM   c                    t          j        |          dz   dt          j        |          z  z   }| |z  }dt          j        d|dz  z            z  }t          j        | t          j        g d          k              }|||fS )Nr   r   r5   rO   )Q?r^   gHzG?rQ   rU   s         r#   
d_st70_uppr_   &  s    %rwt}}(<<J:%Mrvb=!++,,,DVD28$6$6$677788F$&&rM   c                    t          j        |          dz   dt          j        |          z  z   }| |z  }|dz  }d|z  dz
  t          j        d|z            z  }t          j        | t          j        g d          k              }|||fS )Ngףp=
?gQ?r5      rO   )(\?rb   g)\(?rQ   )rV   rW   rX   rY   zsqurZ   r[   s          r#   
v_st70_upprd   .  s    &)==J:%M!DHqLBF29---DVD28$6$6$677788F$&&rM   c                     d|z  }| d|z  z
  d|dz  z  z   d|z   z  }dt          j        dd|z  z
            z  }t           j        }|||fS )	Nr
   g?g333333?r5   r-   皙?gRQ@   )r   rR   nanrV   rW   nobsinvrY   rZ   r[   s         r#   wsqu_st70_upprk   7  sb    4iGC'M)C'1*,<<WMM"&M 11222DVF$&&rM   c                     d|z  }| d|z  z
  d|dz  z  z   }|dd|z  z   z  }dt          j        d|z  t           j        dz  z            z  }t          j        | t          j        g d          k              }|||fS )Nr
   皙?r5   r-   r2   rO   )(\?rn   g(\?r   rR   pirS   rT   ri   s         r#   usqu_st70_upprq   ?  s    4iGC'M)C'1*,<<Ma#-'(MrvcM)BE1H4555DVD28$6$6$677788F$&&rM   c                     d|z  }| d|z  z
  d|dz  z  z   }|dd|z  z   z  }dt          j        d|z  d	z  t           j        dz  z            z  }t          j        | t          j        g d
          k              }|||fS )Nr
   gffffff??r5   r-   gGz?g|?5^?rO          @)r   r   g!rh?ro   ri   s         r#   
a_st70_uppru   H  s    4iGC'M)C'1*,<<Ma$.()M26#-2RUAX=>>>DVD28$7$7$788899F$&&rM   )d_plusd_minusr    vwsquusquastephens70uppc                 v   t           j                            | t          j        |          z            }|dk    s|d|dz  dz  z
  k    rA| t           j                            | t          j        |          z            t          j        fS | t           j                            | |          dz  t          j        fS )Nr1   r2   r3   r4   r5   )r   r@   r?   r   r   rh   r>   )rJ   rB   rK   s      r#   pval_kstest_approxr~   ^  s    &))!BGAJJ,77H4xx8dQsU6\111-),,Qrwqzz\::BFBB-%((1--a/77rM   c                 \    | t           j                            | |          t          j        fS Nr   r>   r?   r   rh   )rH   rB   s     r#   <lambda>r   f  s"    (;(>(>ua(H(H"& Q rM   c                 \    | t           j                            | |          t          j        fS r   r   )rI   rB   s     r#   r   r   g  s"    }':'='=d1'E'Erv N rM   c                     | t           j                            | t          j        |          z            t          j        fS r   )r   r@   r?   r   r   rh   )rJ   rB   s     r#   r   r   h  s,    =255a

lCCRVL rM   )rv   rw   r    scipyr    scipy_approxc                       e Zd ZdZddZed             Zed             Zed             Zed             Z	ed	             Z
ed
             Zed             Zed             ZddZdS )GOFaP  One Sample Goodness of Fit tests

    includes Kolmogorov-Smirnov D, D+, D-, Kuiper V, Cramer-von Mises W^2, U^2 and
    Anderson-Darling A, A^2. The p-values for all tests except for A^2 are based on
    the approximatiom given in Stephens 1970. A^2 has currently no p-values. For
    the Kolmogorov-Smirnov test the tests as given in scipy.stats are also available
    as options.




    design: I might want to retest with different distributions, to calculate
    data summary statistics only once, or add separate class that holds
    summary statistics and data (sounds good).




    r%   r&   c                    t          |t                    rL|r||k    r5t          t          |          j        }t          t          |          j        }nt          d          t          |t                    rt          t          |          j        }t          |          rd|i}t          j	         ||i |          }n#t          j	        |          }t          |          } ||g|R  }|| _        || _        || _        d S )Nr*   r+   )r6   r7   r8   r   r9   r:   r;   r<   r   r   r   rW   vals_sortedrG   )selfr:   r9   rA   rB   rE   rF   rG   s           r#   __init__zGOF.__init__  s   c3 	^ ^SCZZmS115mS115$%\]]] c3 	2---1CC== 	1:D733,t,,--DD73<<DD		A#d"T"""	rM   c                 ~    | j         }| j        }t          j        d|dz             |z  |z
                                  S )Nr
   r-   rW   rG   r   r=   r   r   rW   rG   s      r#   rv   z
GOF.d_plus  s<    y,	#tAv&&t+g5::<<<rM   c                 x    | j         }| j        }|t          j        d|          |z  z
                                  S )Nr/   r   r   s      r#   rw   zGOF.d_minus  s8    y,")C..t3388:::rM   c                 B    t          j        | j        | j        g          S r   )r   r   rv   rw   r   s    r#   r    zGOF.d  s    vt{DL1222rM   c                      | j         | j        z   S )Kuiper)rv   rw   r   s    r#   rx   zGOF.v  s     {T\))rM   c                     | j         }| j        }|dt          j        d|dz             z  dz
  |z  dz  z
  dz                                  d|z  dz  z   }|S )zCramer von Misesrt   r
   r-   r5   g      (@)rW   rG   r   r=   rS   )r   rW   rG   ry   s       r#   ry   zGOF.wsqu  sf     y,B2tAv!6!66:D@CCaGLLNNDrM   c                 l    | j         }| j        }| j        ||                                dz
  dz  z  z
  }|S )N      ?r5   )rW   rG   ry   mean)r   rW   rG   rz   s       r#   rz   zGOF.usqu  s8    y,y47<<>>C#7!";;;rM   c                     | j         }| j        }d}t          d|          D ]@}||         |d |         z
  }|dk    }d||         z
  ||<   ||                                z  }A|dz  d|z  |z  z
  }|S )Nr   r-   r   g      @rt   )rW   rG   rangerS   )r   rW   rG   msumjmjmaskr{   s           r#   r{   zGOF.a  s    y, q 	 	Agbqbk)BHD2d8|BtHBFFHHDD2IT	D((rM   c           	          | j         }| j        }dt          j        d|dz             z  dz
  t          j        |          t          j        d|ddd         z
            z   z                                   |z  |z
  }|S )z4Stephens 1974, does not have p-value formula for A^2rt   r
   r-   N)rW   rG   r   r=   logrS   )r   rW   rG   asqus       r#   r   zGOF.asqu  s     y,ryT!V,,,q026!GDDbDM/#:#::=>AceeDDHIKOP rM   r    r|   c                     t          | |          }|dk    r#t          |         |         || j                  |fS t          |         |         || j                  S )z


        r|   )r8   	gof_pvalsrW   )r   testidpvalsrV   s       r#   get_testzGOF.get_test  s[    
 tV$$O##U#F+D$)<<dBBU#F+D$)<<<rM   N)r%   r&   )r    r|   )__name__
__module____qualname____doc__r   r   rv   rw   r    rx   ry   rz   r{   r   r   r%   rM   r#   r   r   n  s        .   4 = = ^=
 ; ; ^;
 3 3 ^3 * * ^*   ^   ^   ^   ^	= 	= 	= 	= 	= 	=rM   r   d   c                   	 ddl m}  |t                    	t          d          D ]b} | |          }t	          ||          }t
          D ]=}	|                             |                    |d          d         d                    >ct          j	        	fdt
          D                       }t          dd                    t
                               t          d	|d
k                         d                     t          d|dk                         d                     t          d|dk                         d                     d S )Nr   defaultdicti  r|   r-   c                      g | ]
}|         S r%   r%   ).0tiresultss     r#   
<listcomp>zgof_mc.<locals>.<listcomp>   s    666rwr{666rM   	               at 0.01:{Gz?at 0.05:rf   at 0.10:rm   )collectionsr   listr   r   all_gofsappendr   r   rT   printjoinr   )
randfndistrrW   r   ir:   goftr   resarrr   s
            @r#   gof_mcr     sM   ''''''k$G4[[ I IfTll3 	I 	IBBKt}}RAA!DQGHHHH	I X6666X66677F	+x}}X..///	*v}**1--...	*v}**1--...	*v|))!,,-----rM   c           	         t          | j                  }| j        |         }t          d          g|z  }dg|z  }t          d          ||<   t          ddd          ||<   dt          j        d|dz             t          |                   z  dz
  t          j        |           t          j        d| t          |                   z
            z   z  |z                      |           |z
  }|S )z.vectorized Anderson Darling A^2, Stephens 1974Nr   rt   r
   r-   )r   r   slicer   r=   tupler   rS   )rG   axisndimrW   slice_reverseislicer   s          r#   asquarer     s    w}D=D4[[MD(MVd]F;;F4LdB//M$29Ra((v77!;VG__rvam0D0D(E&EFFFHHLMNQcRViiXD KrM      c                 F   ||t          d          t          t          j        |t	          |          z                      }d}t          |          D ]} | j        |fi d||fi}	|                     |	d          }
t          d |
          }
t          j	        | 
                    |	|
          d          }t          |d          }|||k                                    z  }|t	          ||z            z  S  | j        |fi d||fi}	|                     |	d          }
t          d |
          }
t          j	        | 
                    |	|
          d          }t          |d          }|t          j	        |          }|S ||k                                    S )	a  Monte Carlo (or parametric bootstrap) p-values for gof

    currently hardcoded for A^2 only

    assumes vectorized fit_vec method,
    builds and analyses (nobs, nrep) sample in one step

    rename function to less generic

    this works also with nrep=1

    Nzusing batching requires a valuer   r+   r-   r   c                 ,    t          j        | d          S Nr-   r   expand_dimsxs    r#   r   zbootstrap.<locals>.<lambda>;  s    BN1a$8$8 rM   c                 ,    t          j        | d          S r   r   r   s    r#   r   zbootstrap.<locals>.<lambda>D  s    q! 4 4 rM   )
ValueErrorintr   ceilr   r   r:   fit_vecr   r   r9   r   rS   r   )r   rA   rW   nrepvalue
batch_sizen_batchcountirepr:   paramsrG   rV   stat_sorteds                 r#   	bootstrapr     s   , =>???bgd5#4#445566'NN 	+ 	+D%)D@@VZ,>$?@@C]]3Q]//F88&AAFgeiiV441===G7+++Ddem((***EEuWz12222 ei66t 566s++44f=='%))C00q999wQ'''='$--KEM'')))rM   c                    d}t          |          D ]i} |j        |fi d|i}|                    |          }t          j        |                    ||                    }	t          |	d          }
||
| k    z  }j|dz  |z  S )zMonte Carlo (or parametric bootstrap) p-values for gof

    currently hardcoded for A^2 only

    non vectorized, loops over all parametric bootstrap replications and calculates
    and returns specific p-value,

    rename function to less generic

    r   r+   r   r
   )r   r:   r   r   r   r9   r   )r   r   rA   rW   r   r   r   r:   r   rG   rV   s              r#   
bootstrap2r   O  s    $ Ed ! !ei....s##'%))C0011wQ'''$%- 2:rM   c                   &    e Zd ZdZddZd Zd ZdS )NewNormz-just a holder for modified distributions
    r   c                 V    |                     |          |                    |          fS r   )r   std)r   r   r   s      r#   r   zNewNorm.fit_vecp  s!    vvd||QUU4[[((rM   c                 ^    t           j                            ||d         |d                   S )Nr   r-   )locscale)r   normr9   )r   r   rA   s      r#   r9   zNewNorm.cdfs  s(    !%%aT!WDG%DDDrM   c                 n    |d         }|d         }||t           j                            |          z  z   S )Nr   r-   r+   )r   r   r:   )r   rA   r+   r   r   s        r#   r:   zNewNorm.rvsv  s8    G1gU]/333>>>>>rM   Nr   )r   r   r   r   r   r9   r:   r%   rM   r#   r   r   l  sS         ) ) ) )E E E? ? ? ? ?rM   r   __main__)stats   r   zscipy kstestr   )r    rv   rw   rx   ry   rz   r{   z
Is it correctly sized?r   r-   c                 (    g | ]}t           |         S r%   )r   )r   r   s     r#   r   r     s    666rwr{666rM   r   r   r   r   r   rf   r   rm   c                 D    t           j                            d|           S )Nr   r   )r   tr:   rW   s    r#   r   r     s    AD11 rM   r   )r   r-   )rA   rW   r   r   )gGz?gffffff?rs   )r%   r&   r'   r(   )r   r   )r%   r   r   NN)r%   r   r   );r   statsmodels.compat.pythonr   numpyr   scipy.statsr   statsmodels.tools.decoratorsr   scipy.specialr   r   r$   rL   r\   dminus_st70_uppr_   rd   rk   rq   ru   r   r~   r   r   r   r   r   r   r   r   r   r   r:   r   r   r   r   r   r   r   r   r   rW   r   r   randomrandnr   rT   r   r   r   r   btfloorastyper   
quantindexr%   rM   r#   <module>r     s   & + * * * * *     % % % % % % 7 7 7 7 7 7 . . . . . .Z Z Z~Y8 Y8 Y8 Y8|' ' ' !' ' '' ' '' ' '' ' '' ' ' 	 


 	/ 8 8 8 RQNN
L
L 	'  
	. ~= ~= ~= ~= ~= ~= ~= ~=N. . . ."   ..* .* .* .*d   :? ? ? ? ? ? ? ?& z
'++ac+
"
"C	E.	E&&f

3sFD	E$--//CCCH 6 6b$--O445555	E
$%%%''''''k$GDU3ZZ I Iiood##s3 	I 	IBBKt}}RAA!DQGHHHH	I RX66X66677F	E+x}}X..///	E*v}**1--...	E*v}**1--...	E*v|))!,,---
F116DDDDDD	77995t$d	K	K	KB$*;*;*;!<!<<==DDSIIJ	E"Z. Q rM   