
    M/Phm#                         d Z ddlZddlZddlmZ ddlmZ ddlm	Z	m
Z
mZmZmZmZ  G d d          Z G d d	e          ZdS )
zM
Created on Wed Nov 18 15:17:58 2020

Author: Josef Perktold
License: BSD-3

    N)cache_readonly)test_chisquare_binning)test_poisson_dispersiontest_poisson_zeroinflation_jh test_poisson_zeroinflation_broektest_poisson_zerostest_chisquare_prob
plot_probsc                   D    e Zd ZdZddZed             Zd	dZ	 	 d
dZdS )CountDiagnostica?  Diagnostic and specification tests and plots for Count model

    status: experimental

    Parameters
    ----------
    results : Results instance of a count model.
    y_max : int
        Largest count to include when computing predicted probabilities for
        counts. Default is the largest observed count.

    Nc                 "    || _         || _        d S N)resultsy_max)selfr   r   s      _/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/discrete/diagnostic.py__init__zCountDiagnostic.__init__+   s    


    c                 z    | j         dt          j        | j         dz             i}ni } | j        j        dddi|S )Ny_values   whichprob )r   nparanger   predict)r   kwdss     r   probs_predictedzCountDiagnostic.probs_predicted/   sK    :!	$*q. 9 9:DDD#t|#99&9D999r   c                     i }| t          j        |d         dz             |d<    | j        j        dddi|}t	          | j        |||          }|S )	a*  Moment test for binned probabilites using OPG.

        Paramters
        ---------
        binedges : array_like or None
            This defines which counts are included in the test on frequencies
            and how counts are combined in bins.
            The default if bin_edges is None will change in future.
            See Notes and Example sections below.
        method : str
            Currently only `method = "opg"` is available.
            If method is None, the OPG will be used, but the default might
            change in future versions.
            See Notes section below.

        Returns
        -------
        test result

        Notes
        -----
        Warning: The current default can have many empty or nearly empty bins.
        The default number of bins is given by max(endog).
        Currently it is recommended to limit the number of bins explicitly,
        see Examples below.
        Binning will change in future and automatic binning will be added.

        Currently only the outer product of gradient, OPG, method is
        implemented. In many case, the OPG version of a specification test
        overrejects in small samples.
        Specialized tests that use observed or expected information matrix
        often have better small sample properties.
        The default method will change if better methods are added.

        Examples
        --------
        The following call is a test for the probability of zeros
        `test_chisquare_prob(bin_edges=np.arange(3))`

        `test_chisquare_prob(bin_edges=np.arange(10))` tests the hypothesis
        that the frequencies for counts up to 7 correspond to the estimated
        Poisson distributions.
        In this case, edges are 0, ..., 9 which defines 9 bins for
        counts 0 to 8. The last bin is dropped, so the joint test hypothesis is
        that the observed aggregated frequencies for counts 0 to 7 correspond
        to the model prediction for those frequencies. Predicted probabilites
        Prob(y_i = k | x) are aggregated over observations ``i``.

        Nr   r   r   r   )	bin_edgesmethodr   )r   r   r   r   r	   )r   r"   r#   r   probsress         r   r	   z#CountDiagnostic.test_chisquare_prob7   sp    d  !y2):;;D$$::6:T::!$,)/1 1 1
r   	predictedc                 
   | j                             d          }t          |          }t          j        | j        j        j                            t                    |          d|         }t          |||||          }|S )zFPlot observed versus predicted frequencies for entire sample.
        r   )	minlengthN)labelupp_xlimfig)r   sumlenr   bincountr   modelendogastypeintr
   )r   r)   r*   r+   r   k_probsfreqs          r   r
   zCountDiagnostic.plot_probsr   s     .22155o&&{4<-3::3??%,. . ..6wh8$x " " " 
r   r   )NN)r&   NN)	__name__
__module____qualname____doc__r   r   r   r	   r
   r   r   r   r   r      s~             : : ^:9 9 9 9v 6:     r   r   c                   4    e Zd ZdZd Zd ZddZ	 	 	 ddZdS )PoissonDiagnosticzDiagnostic and specification tests and plots for Poisson model

    status: experimental

    Parameters
    ----------
    results : PoissonResults instance

    c                     || _         d S r   )r   )r   r   s     r   _init__zPoissonDiagnostic._init__   s    r   c                 .    t          | j                  }|S )z{Test for excess (over or under) dispersion in Poisson.

        Returns
        -------
        dispersion results
        )r   r   )r   r%   s     r   test_dispersionz!PoissonDiagnostic.test_dispersion   s     &dl33
r   r   Nc                 $   |dk    r+|t          j        d           t          | j                  }n^|dk    rX|t	          | j                  }nAt          j        |          }|j        dk    r|dddf         }t          | j        |          }|S )a  Test for excess zeros, zero inflation or deflation.

        Parameters
        ----------
        method : str
            Three methods ara available for the test:

             - "prob" : moment test for the probability of zeros
             - "broek" : score test against zero inflation with or without
                explanatory variables for inflation

        exog_infl : array_like or None
            Optional explanatory variables under the alternative of zero
            inflation, or deflation. Only used if method is "broek".

        Returns
        -------
        results

        Notes
        -----
        If method = "prob", then the moment test of He et al 1_ is used based
        on the explicit formula in Tang and Tang 2_.

        If method = "broek" and exog_infl is None, then the test by Van den
        Broek 3_ is used. This is a score test against and alternative of
        constant zero inflation or deflation.

        If method = "broek" and exog_infl is provided, then the extension of
        the broek test to varying zero inflation or deflation by Jansakul and
        Hinde is used.

        Warning: The Broek and the Jansakul and Hinde tests are not numerically
        stable when the probability of zeros in Poisson is small, i.e. if the
        conditional means of the estimated Poisson distribution are large.
        In these cases, p-values will not be accurate.
        r   Nz*exog_infl is only used if method = "broek"broekr   )	exog_infl)	warningswarnr   r   r   r   asarrayndimr   )r   r#   rA   r%   s       r   test_poisson_zeroinflationz,PoissonDiagnostic.test_poisson_zeroinflation   s    L V$JKKK$T\22CCw 6t|DDJy11	>Q&& )!!!T' 2I3DL>GI I I 
r   
   	quicksort皙?皙?c           
         || j                             d          }| j         j        j        }| j                             d          }	|dddf         t	          j        |	j        d                   k                        t                    }
|Xt          |          }||

                    d                              d          z
  }t	          j        |||z  k               dz
  }|	ddd|f         }	|
ddd|f         }
|	dddfxx         d|	
                    d          z
  z  cc<   |
dddfxx         d|

                    d          z
  z  cc<   t          |
|	|||d||	          }|S )
a  Hosmer-Lemeshow style test for count data.

        Note, this does not take into account that parameters are estimated.
        The distribution of the test statistic is only an approximation.

        This corresponds to the Hosmer-Lemeshow type test for an ordinal
        response variable. The outcome space y = k is partitioned into bins
        and treated as ordinal variable.
        The observations are split into approximately equal sized groups
        of observations sorted according the ``sort_var``.

        Nlin)r   r   r   r   T)sort_varbinsdforderedsort_methodalpha_nc)r   r   r/   r0   r   r   shaper1   r2   r-   r,   cumsumargmaxr   )r   rN   rO   k_maxrP   rR   frac_upprS   r0   expectedcountsnobsicumcounts_sumr%   s                 r   _chisquare_binnedz#PoissonDiagnostic._chisquare_binned   s     |++%+88H"(
 <''f'554.BIhnQ.?$@$@@HHMM =u::D!FJJqMM$8$8$;$;;NInth>??!CEAAAvvI&6E6	" 	B1x||A..qqq"uVZZ]]** %VX*.2t1<.68 8 8 
r   )r   N)NrG   NNrH   rI   rJ   )r5   r6   r7   r8   r<   r>   rF   r]   r   r   r   r:   r:      sr             4 4 4 4l HL<?#', , , , , ,r   r:   )r8   rB   numpyr   statsmodels.tools.decoratorsr    statsmodels.stats.diagnostic_genr   'statsmodels.discrete._diagnostics_countr   r   r   r   r	   r
   r   r:   r   r   r   <module>rb      s         7 7 7 7 7 7                    ` ` ` ` ` ` ` `Fz z z z z z z z z zr   