
    M/PhA              	          d Z ddlmZ ddlZddlmZ  G d d          Zedk    rddl	m
Z
 dd	lmZ d%dZd&dZ ed           d Zd Zd Z ed            eee          Ze                    d ed                      ee                    dg d                      ee                    d                      ee                    d                      ee                    d                      ee                    g d e
j        g d          j        g dd                      ee                    dd                      ee                    d d!                      ee                    d g d                     g dZ e
j        d"d#g                               ej        e          j                  Z ee                    dd!geed$                      e
j        g d                               ej        e          j                  Z ee                    g deeg dd                      ee                    eg d          d         j                    	 dZ!e!rBddl"m#Z$ e%                    d e
j        d"          j&                    e$j'                     dS dS dS )'a}  Helper class for Monte Carlo Studies for (currently) statistical tests

Most of it should also be usable for Bootstrap, and for MC for estimators.
Takes the sample generator, dgb, and the statistical results, statistic,
as functions in the argument.


Author: Josef Perktold (josef-pktd)
License: BSD-3


TODOs, Design
-------------
If we only care about univariate analysis, i.e. marginal if statistics returns
more than one value, the we only need to store the sorted mcres not the
original res. Do we want to extend to multivariate analysis?

Use distribution function to keep track of MC results, ECDF, non-paramatric?
Large parts are similar to a 2d array of independent multivariate random
variables. Joint distribution is not used (yet).

I guess this is currently only for one sided test statistics, e.g. for
two-sided tests basend on t or normal distribution use the absolute value.

    )lrangeN)SimpleTablec                   n    e Zd ZdZd Zdg g fdZddZd Zdg dfdZdd	Z	ddZ
g dddfdZddZdS )
StatTestMCa  class to run Monte Carlo study on a statistical test'''

    TODO
    print(summary, for quantiles and for histogram
    draft in trying out script log

    Parameters
    ----------
    dgp : callable
        Function that generates the data to be used in Monte Carlo that should
        return a new sample with each call
    statistic : callable
        Function that calculates the test statistic, which can return either
        a single statistic or a 1d array_like (tuple, list, ndarray).
        see also statindices in description of run

    Attributes
    ----------
    many methods store intermediate results

    self.mcres : ndarray (nrepl, nreturns) or (nrepl, len(statindices))
        Monte Carlo results stored by run


    Notes
    -----

    .. Warning::
       This is (currently) designed for a single call to run. If run is
       called a second time with different arguments, then some attributes might
       not be updated, and, therefore, not correspond to the same run.

    .. Warning::
       Under Construction, do not expect stability in Api or implementation


    Examples
    --------

    Define a function that defines our test statistic:

    def lb(x):
        s,p = acorr_ljungbox(x, lags=4)
        return np.r_[s, p]

    Note lb returns eight values.

    Define a random sample generator, for example 500 independently, normal
    distributed observations in a sample:


    def normalnoisesim(nobs=500, loc=0.0):
        return (loc+np.random.randn(nobs))

    Create instance and run Monte Carlo. Using statindices=list(range(4)) means that
    only the first for values of the return of the statistic (lb) are stored
    in the Monte Carlo results.

    mc1 = StatTestMC(normalnoisesim, lb)
    mc1.run(5000, statindices=list(range(4)))

    Most of the other methods take an idx which indicates for which columns
    the results should be presented, e.g.

    print(mc1.cdf(crit, [1,2,3])[1]
    c                 "    || _         || _        d S N)dgp	statistic)selfr	   r
   s      a/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/sandbox/tools/mctools.py__init__zStatTestMC.__init__d   s    "    Nc                 N   || _         || _        || _        || _        | j        }| j        } | || g|R  t          t          j                            x| _	        }|Bt          j
        |          }|d<   t          d|dz
  |          D ]}	 || }
 ||
g|R  ||	<   nyt          |          x| _	        }t          j
        ||f          x| _        }fd|D             |d<   t          d|dz
            D ]!}	 || }
 ||
g|R  fd|D             ||	<   "|| _        dS )a  run the actual Monte Carlo and save results

        Parameters
        ----------
        nrepl : int
            number of Monte Carlo repetitions
        statindices : None or list of integers
           determines which values of the return of the statistic
           functions are stored in the Monte Carlo. Default None
           means the entire return. If statindices is a list of
           integers, then it will be used as index into the return.
        dgpargs : tuple
           optional parameters for the DGP
        statsargs : tuple
           optional parameters for the statistics function

        Returns
        -------
        None, all results are attached


        Nr      c                      g | ]
}|         S  r   ).0imcres0s     r   
<listcomp>z"StatTestMC.run.<locals>.<listcomp>   s    777aq	777r   c                      g | ]
}|         S r   r   )r   r   rets     r   r   z"StatTestMC.run.<locals>.<listcomp>   s    999SV999r   )nreplstatindicesdgpargs	statsargsr	   r
   lennpravelnreturnzerosrangemcres)r   r   r   r   r   r	   statfunnreturnsr#   iixr   r   s              @@r   runzStatTestMC.runh   s   . 
&"h. g3333"%bhv&6&6"7"77x HUOOEE!HAuQw11 3 3CM#GA2	222b		3 '*+&6&66DL8!#5(*;!<!<<DJ7777;777E!HAuQw'' : :CMga,),,,9999[999b		


r   c                    | j         j        dk    r#|| j         dd|f         }nt          d          | j         }|t          j        |d          }n|d         t          j         k    s*t          j        t          j         |t          j        f         }|d         t          j         k    st          j        |t          j        f         }t          j        |t          j        t          j         |t          j        f                   }|| _        t          j        |d                   dz  | j	        z  | _
        t          j        |d         ddd                   ddd         dz  | j	        z  | _        || j
        | j        fS )	zcalculate histogram values

        does not do any plotting

        I do not remember what I wanted here, looks similar to the new cdf
        method, but this also does a binned pdf (self.histo)


           N$currently only 1 statistic at a time
   )binsr   g      ?)r#   ndim
ValueErrorr   	histograminfr_histocumsumr   cumhistocumhistoreversed)r   idxcritvalr#   r4   r-   s         r   r1   zStatTestMC.histogram   sU    :?a
111S5) !GHHHJE?LR000EE1:"&((UBF7GRV341:"&((U4<(L&(eRVGWbf,D&EG G GE 
	%(++B.tz9 "	%(44R4. 9 9$$B$ ? B4: MdmT%:::r   c                 p    t          | d          s t          j        | j        d          | _        | j        S )N	mcressortr   )axis)hasattrr   sortr#   r;   )r   s    r   get_mc_sortedzStatTestMC.get_mc_sorted   s4    t[)) 	9WTZa888DN~r   g{Gz?g?g?g?g333333?c                 <   | j         j        dk    r#|| j         dd|f         }nt          d          | j         }t          j        |          x| _        }|                                 dd|f         }||| j        |z                      t                             fS )a#  calculate quantiles of Monte Carlo results

        similar to ppf

        Parameters
        ----------
        idx : None or list of integers
            List of indices into the Monte Carlo results (columns) that should
            be used in the calculation
        frac : array_like, float
            Defines which quantiles should be calculated. For example a frac
            of 0.1 finds the 10% quantile, x such that cdf(x)=0.1

        Returns
        -------
        frac : ndarray
            same values as input, TODO: I should drop this again ?
        quantiles : ndarray, (len(frac), len(idx))
            the quantiles with frac in rows and idx variables in columns

        Notes
        -----

        rename to ppf ? make frac required
        change sequence idx, frac


        r*   Nr+   )
r#   r/   r0   r   asarrayfracr?   r   astypeint)r   r8   rC   r#   	mc_sorteds        r   	quantileszStatTestMC.quantiles   s    < :?a
111S5) !GHHHJE:d+++	D&&((3/	Y
477<<===r   c                    t          j        |                                          }|                                 }t          j        |          }|j        dk    r!|j        d         t          |          k    rd}nd}|}g }t          |          D ]Z\  }}|r|dd|f         }|	                    t          j
        |dd|f         |          t          | j                  z             [t          j        |          j        }||fS )a  calculate cumulative probabilities of Monte Carlo results

        Parameters
        ----------
        idx : None or list of integers
            List of indices into the Monte Carlo results (columns) that should
            be used in the calculation
        frac : array_like, float
            Defines which quantiles should be calculated. For example a frac
            of 0.1 finds the 10% quantile, x such that cdf(x)=0.1

        Returns
        -------
        x : ndarray
            same as input, TODO: I should drop this again ?
        probs : ndarray, (len(x), len(idx))
            the quantiles with frac in rows and idx variables in columns



        r   TFN)r   
atleast_1dtolistr?   rB   r/   shaper   	enumerateappendsearchsortedfloatr   T)	r   r'   r8   rF   use_xix_probsr   ixs	            r   cdfzStatTestMC.cdf   s   , mC  '')) &&((	JqMM6A::!'!*c#hh..FFFcNN 	Q 	QDAb qqqsVLL111R4"==eDJ>O>OOPPPP
5!!#%xr   2   c                    |i i f}| j         j        dk    r#|| j         dd|f         }nt          d          | j         }t          j        |                                |                                d          }ddlm}  |j	                    }	 |j
        |f|dd|d           |j        | ||          dfi |d	          dS )
a  plot the histogram against a reference distribution

        Parameters
        ----------
        idx : None or list of integers
            List of indices into the Monte Carlo results (columns) that should
            be used in the calculation
        distpdf : callable
            probability density function of reference distribution
        bins : {int, array_like}
            used unchanged for matplotlibs hist call
        ax : TODO: not implemented yet
        kwds : None or tuple of dicts
            extra keyword options to the calls to the matplotlib functions,
            first dictionary is for his, second dictionary for plot of the
            reference distribution

        Returns
        -------
        None


        Nr*   r+   d   r   T)r-   normedrr   )r#   r/   r0   r   linspaceminmaxmatplotlib.pyplotpyplotfigurehistplot)
r   r8   distpdfr-   axkwdsr#   lsppltfigs
             r   	plot_histzStatTestMC.plot_hist&  s    0 <r7D:?a
111S5) !GHHHJEk%))++uyy{{C88 	('''''
 cjll:T$::$q':::ggcllC334733333r   c                 2   t          j        |          }|                     ||          \  }} |t          j        |          j                  }g }	t          |          D ]0\  }
}|	                    |dd|
f         |dd|
f         g           1t          j        |g|	z             }|r|dz   }nd}|'d t          |j	        d         dz            D             }dgd	 |D             z   }t          |d
dgdg|j	        d         dz
  z  z   i||          S )a  summary table for quantiles (critical values)

        Parameters
        ----------
        idx : None or list of integers
            List of indices into the Monte Carlo results (columns) that should
            be used in the calculation
        distppf : callable
            probability density function of reference distribution
            TODO: use `crit` values instead or additional, see summary_cdf
        frac : array_like, float
            probabilities for which
        varnames : None, or list of strings
            optional list of variable names, same length as idx

        Returns
        -------
        table : instance of SimpleTable
            use `print(table` to see results

        )rC   Nz Quantiles (critical values)zQuantiles (critical values)c                     g | ]}d |z  S zvar%dr   r   r   s     r   r   z0StatTestMC.summary_quantiles.<locals>.<listcomp>~  s    FFF!FFFr   r   r*   z
probc                 &    g | ]}d D ]	}| d| 
S ))mcdist
r   )r   r   ts      r   r   z0StatTestMC.summary_quantiles.<locals>.<listcomp>  s/    VVV~VV!1VVVVr   	data_fmts%#6.3f%#10.4ftxt_fmttitleheaders)r   rI   rG   
atleast_2drP   rL   extendcolumn_stackr"   rK   r   )r   r8   distppfrC   varnamesrx   quantmcqcritmmlr   rT   mmlarry   s                 r   summary_quantileszStatTestMC.summary_quantilesU  sR   . mC  ^^Cd^33
s wr}U++-..s^^ 	. 	.EArJJAAAaC$qqqs),----#.. 	099EE/EFFU5;q>13D-E-EFFFH*VVXVVVV5#.
I;TUWXHX;Y0Y"Z %")+ + + 	+r   c           	      ,   t          j        |          }g }t          t          |                    D ]T}|                    |                     |dd|f         ||         g          d                                                    Ut          j        |g|z             }|r|dz   }nd}|'d t          |j        d         dz
            D             }dg|z   }	t          |ddgd	gt          j
        |          j        d         dz
  z  z   i||	
          S )am  summary table for cumulative density function


        Parameters
        ----------
        idx : None or list of integers
            List of indices into the Monte Carlo results (columns) that should
            be used in the calculation
        frac : array_like, float
            probabilities for which
        crit : array_like
            values for which cdf is calculated
        varnames : None, or list of strings
            optional list of variable names, same length as idx

        Returns
        -------
        table : instance of SimpleTable
            use `print(table` to see results


        Nr   z ProbabilitesProbabilitiesc                     g | ]}d |z  S rl   r   rm   s     r   r   z*StatTestMC.summary_cdf.<locals>.<listcomp>  s    EEE!EEEr   probrs   rt   ru   rv   )r   rI   r"   r   rM   rU   r   r|   rK   r   array)
r   r8   rC   r   r~   rx   r   r   r   ry   s
             r   summary_cdfzStatTestMC.summary_cdf  s4   . mC   s3xx 	A 	AAJJtxxQQQqS	CF844Q7==??@@@@ -- 	"?*EE!E EEU5;q>!3C-D-DEEEH(X%5#.
I;QTH[\]H^_`H`;a0a"b %")+ + + 	+r   )NNr   )NrV   NN)__name__
__module____qualname____doc__r   r(   r1   r?   rG   rU   ri   r   r   r   r   r   r   r       s        A AF# # # &*2 6 6 6 6r; ; ; ;D   !'F'F'F )> )> )> )>V0 0 0 0d,4 ,4 ,4 ,4^ 4S3R3R#'t.+ .+ .+ .+`1+ 1+ 1+ 1+ 1+ 1+r   r   __main__)statsacorr_ljungboxrX           c                 j    |t           j                            |           z                                   S r   )r   randomrandnr5   )nobsdrifts     r   randwalksimr     s'    biood+++33555r     c                 F    |t           j                            |           z   S r   )r   r   r   )r   locs     r   normalnoisesimr     s    BIOOD)))*r   z

Ljung Boxc                 L    t          | dd          \  }}|d         |d         fS )N   Tlags	return_dfr.   r   r'   sps      r   lb4r     s,    QQ$777!uae|r   c                 L    t          | dd          \  }}|d         |d         fS )Nr   Tr   r   r   r   s      r   lb1r     s,    QQ$777!tQqTzr   c                 T    t          | dd          \  }}t          j        ||f         S )Nr   Tr   )r   r   r3   r   s      r   lbr     s*    QQ$777!uQT{r   zResults with MC classi'     )r   r   r@   )r9   )r   r*      )r*   r   r   )zlag 1zlag 2zlag 3r   )r~   rx   gNё\C?g9#J?r   r*   r   )rx   )rX   r   )r   r   )(r   statsmodels.compat.pythonr   numpyr   statsmodels.iolib.tabler   r   r   scipyr   statsmodels.stats.diagnosticr   r   r   printr   r   r   mc1r(   r1   rG   r   chi2ppfrU   rC   rz   rP   r   r   rK   doplotr^   r_   rg   ri   pdfshowr   r   r   <module>r      s   2 - , , , , ,     / / / / / /V+ V+ V+ V+ V+ V+ V+ V+@ z;;;;;;6 6 6 6+ + + + 
E-       
E
!"""
*^R
(
(CGGEvvayyG)))	E#--#B#B#B-
C
CDDD	E#--

	E#--

	E#--

 
E#

GGG)<)<)@)D)D)D&6   8 8 9 9 9 
E#''&!

	E#''&!

	E#''&'''
"
"###***D5:qe  t!4!4!677D	E#//1Q%t3C/
D
DEEE5:ggg""=2=#6#6#899D	E#//'''4#>#>#> 0  2 2 3 3 3 
E#''$
 
 
#
)*** F ''''''a

1)***




Q J r   