
    M/Ph&                         d Z ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
mZmZ ddlmZ d	 ZddZddZ ed          Z ed          Zd ZddZeZeZ eed          ZdS )u  
Implements Lilliefors corrected Kolmogorov-Smirnov tests for normal and
exponential distributions.

`kstest_fit` is provided as a top-level function to access both tests.
`kstest_normal` and `kstest_exponential` are provided as convenience functions
with the appropriate test as the default.
`lilliefors` is provided as an alias for `kstest_fit`.

Created on Sat Oct 01 13:16:49 2011

Author: Josef Perktold
License: BSD-3

pvalues for Lilliefors test are based on formula and table in

An Analytic Approximation to the Distribution of Lilliefors's Test Statistic
for Normality
Author(s): Gerard E. Dallal and Leland WilkinsonSource: The American
Statistician, Vol. 40, No. 4 (Nov., 1986), pp. 294-296
Published by: American Statistical Association
Stable URL: http://www.jstor.org/stable/2684607 .

On the Kolmogorov-Smirnov Test for Normality with Mean and Variance Unknown
Hubert W. Lilliefors
Journal of the American Statistical Association, Vol. 62, No. 318.
(Jun., 1967), pp. 399-402.

---

Updated 2017-07-23
Jacob C. Kimmel

Ref:
Lilliefors, H.W.
On the Kolmogorov-Smirnov test for the exponential distribution with mean
unknown. Journal of the American Statistical Association, Vol 64, No. 325.
(1969), pp. 387–389.
    )partialN)stats)string_like   )critical_valuesasymp_critical_valuesPERCENTILES)	TableDistc                       fd}|S )a&  
    Generates an asymptotic distribution callable from a param matrix

    Polynomial is a[0] * x**(-1/2) + a[1] * x**(-1) + a[2] * x**(-3/2)

    Parameters
    ----------
    params : ndarray
        Array with shape (nalpha, 3) where nalpha is the number of
        significance levels
    c                     t          j        dt          j        |           t          j        |           dz  g          }t          j        |                    j                            S )Nr      )nparraylogexpdotT)npolyparamss     ]/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/stats/_lilliefors.pyfz$_make_asymptotic_function.<locals>.fA   sK    xBF1IIrvayyA~677vdhhvx(()))     )r   r   s   ` r   _make_asymptotic_functionr   4   s#    * * * * * Hr   	two_sidedr   c                 $   t          t          |                     }t          |t                    r t	          t
          j        |          j        }n t          |d          rt	          |d          }t          j
        |           }  || g|R  }t          j        d|dz             |z  |z
                                  }|t          j        d|          |z  z
                                  }|dk    r|S |dk    r|S t          j        ||g          S )a  
    Calculate statistic for the Kolmogorov-Smirnov test for goodness of fit

    This calculates the test statistic for a test of the distribution G(x) of
    an observed variable against a given distribution F(x). Under the null
    hypothesis the two distributions are identical, G(x)=F(x). The
    alternative hypothesis can be either 'two_sided' (default), 'less'
    or 'greater'. The KS test is only valid for continuous distributions.

    Parameters
    ----------
    x : array_like, 1d
        array of observations
    cdf : str or callable
        string: name of a distribution in scipy.stats
        callable: function to evaluate cdf
    alternative : 'two_sided' (default), 'less' or 'greater'
        defines the alternative hypothesis (see explanation)
    args : tuple, sequence
        distribution parameters for call to cdf


    Returns
    -------
    D : float
        KS test statistic, either D, D+ or D-

    See Also
    --------
    scipy.stats.kstest

    Notes
    -----

    In the one-sided test, the alternative is that the empirical
    cumulative distribution function of the random variable is "less"
    or "greater" than the cumulative distribution function F(x) of the
    hypothesis, G(x)<=F(x), resp. G(x)>=F(x).

    In contrast to scipy.stats.kstest, this function only calculates the
    statistic which can be used either as distance measure or to implement
    case specific p-values.
    cdfg      ?r   g        greaterless)floatlen
isinstancestrgetattrr   distributionsr   hasattrr   sortarangemax)xr   alternativeargsnobscdfvalsd_plusd_mins           r   ksstatr2   H   s   X Q==D#s "e)3//3	e		 "c5!!


Ac!mdmmmGiTAX&&-7<<>>Fryd++d227799Ei			665/"""r   normc                 N   dt          j        t                    dz  z
  }|ddd         }| dk    rdn| } | t          vrt	          d          t          |          t
          |          t          j        t                    t                    }t          j        fd	t                    D                       }|dddddf         }t          j        fd
t                    D                       }t          |ddd                   }t          ||||          }|S )a  
    Generates tables for significance levels of Lilliefors test statistics

    Tables for available normal and exponential distribution testing,
    as specified in Lilliefors references above

    Parameters
    ----------
    dist : str
        distribution being tested in set {'norm', 'exp'}.

    Returns
    -------
    lf : TableDist object.
        table of critical values
    r         Y@Nr3   normalz/Invalid dist parameter. Must be 'norm' or 'exp')dtypec                      g | ]
}|         S r   r   ).0keycv_datas     r   
<listcomp>z(get_lilliefors_table.<locals>.<listcomp>   s    @@@@@@r   c                      g | ]
}|         S r   r   )r:   r;   acv_datas     r   r=   z(get_lilliefors_table.<locals>.<listcomp>   s    FFFcHSMFFFr   )
asymptotic)
r   r   r	   r   
ValueErrorr   sortedr!   r   r
   )	distalphasizecrit_lfasym_paramsasymp_fnlfr?   r<   s	          @@r   get_lilliefors_tablerJ      s(   ( %%--E$$B$KEv~~884D?""JKKKd#G$T*H8F7OO5111Dh@@@@w@@@AAGaaa2gG(FFFFVH5E5EFFFGGK(TTrT):;;H	5$H	=	=	=BIr   )rC   r   c                     |dk    r| |dz  dz  z  } d}t          j        d| dz  z  |dz   z  d| z  t          j        |dz             z  z   dz
  d	t          j        |          z  z   d
|z  z             }|S )a`  
    Approximate pvalues for Lilliefors test

    This is only valid for pvalues smaller than 0.1 which is not checked in
    this function.

    Parameters
    ----------
    d_max : array_like
        two-sided Kolmogorov-Smirnov test statistic
    n : int or float
        sample size

    Returns
    -------
    p-value : float or ndarray
        pvalue according to approximation formula of Dallal and Wilkinson.

    Notes
    -----
    This is mainly a helper function where the calling code should dispatch
    on bound violations. Therefore it does not check whether the pvalue is in
    the valid range.

    Precision for the pvalues is around 2 to 3 decimals. This approximation is
    also used by other statistical packages (e.g. R:fBasics) but might not be
    the most precise available.

    References
    ----------
    DallalWilkinson1986
    d   r5   g\(\?gwTr   gvT5A=@gHȰ@g`80C?g}%/?g-9(?)r   r   sqrt)d_maxr   pvals      r   pval_lfrP      s    D 	3ww!d(t##6(UaZ'1w;7eObga'k&:&::;=EFrwqzz)*,3aK8 9 9D Kr   tablec                 >   t          |dd          }t          j        |           } | j        dk    r| j        d         dk    r| dddf         } n| j        dk    rt          d          t          |           }|d	k    rG| |                                 z
  |                     d
          z  }t          j
        j        }t          }nG|dk    r2| |                                 z  }t          j        j        }t          }d}nt          d          |d	k    rdnd}||k     r#t          d                    ||                    t!          ||d          }|dk    r-t#          ||          }	|	dk    r|                    ||          }	n|                    ||          }	||	fS )a  
    Test assumed normal or exponential distribution using Lilliefors' test.

    Lilliefors' test is a Kolmogorov-Smirnov test with estimated parameters.

    Parameters
    ----------
    x : array_like, 1d
        Data to test.
    dist : {'norm', 'exp'}, optional
        The assumed distribution.
    pvalmethod : {'approx', 'table'}, optional
        The method used to compute the p-value of the test statistic. In
        general, 'table' is preferred and makes use of a very large simulation.
        'approx' is only valid for normality. if `dist = 'exp'` `table` is
        always used. 'approx' uses the approximation formula of Dalal and
        Wilkinson, valid for pvalues < 0.1. If the pvalue is larger than 0.1,
        then the result of `table` is returned.

    Returns
    -------
    ksstat : float
        Kolmogorov-Smirnov test statistic with estimated mean and variance.
    pvalue : float
        If the pvalue is lower than some threshold, e.g. 0.05, then we can
        reject the Null hypothesis that the sample comes from a normal
        distribution.

    Notes
    -----
    'table' uses an improved table based on 10,000,000 simulations. The
    critical values are approximated using
    log(cv_alpha) = b_alpha + c[0] log(n) + c[1] log(n)**2
    where cv_alpha is the critical value for a test with size alpha,
    b_alpha is an alpha-specific intercept term and c[1] and c[2] are
    coefficients that are shared all alphas.
    Values in the table are linearly interpolated. Values outside the
    range are be returned as bounds, 0.990 for large and 0.001 for small
    pvalues.

    For implementation details, see  lilliefors_critical_value_simulation.py in
    the test directory.
    
pvalmethod)approxrQ   )optionsr   r   Nr   zXInvalid parameter `x`: must be a one-dimensional array-like or a single-column DataFramer3   )ddofr   rQ   z/Invalid dist parameter, must be 'norm' or 'exp'      z:Test for distribution {} requires at least {} observationsr   )r,   rT   g?)r   r   asarrayndimshaperA   r"   meanstdr   r3   r   lilliefors_table_normexponlilliefors_table_exponformatr2   rP   prob)
r+   rC   rS   r.   ztest_dlilliefors_tablemin_nobsd_ksrO   s
             r   
kstest_fitrh      s   X Z)%8: : :J 	
1Av{{qwqzQaaadG	
1 D E E 	E q66Dv~~\QUUU]]*0	L1

JKKKFNNqqHh ((.tX(>(>@ @ 	@ !V555DXtT""#::#((t44D$$T400:r   )r   r   )r3   )r3   rQ   )__doc__	functoolsr   numpyr   scipyr   statsmodels.tools.validationr   _lilliefors_critical_valuesr   r   r	   	tabledistr
   r   r2   rJ   r^   r`   rP   rh   
lillieforskstest_normalkstest_exponentialr   r   r   <module>rs      sM  & &N                 4 4 4 4 4 47 7 7 7 7 7 7 7 7 7 !            (=# =# =# =#@$ $ $ $N -,&999 --5999 ( ( (VS S S Sl 
WZe444   r   