
    M/PhN                         d Z ddlZddlmZ ddlZddlmZ ddl	m
Z
 ddlmZ d Z	 	 dd	Zdd
Z G d de          ZddZ	 	 	 	 	 	 ddZddZd Zd ZdS )z=
Created on Fri Sep 15 12:53:45 2017

Author: Josef Perktold
    N)stats)HolderTuple)Poisson)OLSc                    t          j        |          }|j        dk    rd}|dddf         }nd}g }g }t          t	          |           dz
            D ]^}| ||dz            \  }}|                    |dd||f                             d                     |                    ||z
             _t          j        |          }|r|                                }|t          j        |          fS )a  group columns into bins using sum

    This is mainly a helper function for combining probabilities into cells.
    It similar to `np.add.reduceat(x, edge_index, axis=-1)` except for the
    treatment of the last index and last cell.

    Parameters
    ----------
    edge_index : array_like
         This defines the (zero-based) indices for the columns that are be
         combined. Each index in `edge_index` except the last is the starting
         index for a bin. The largest index in a bin is the next edge_index-1.
    x : 1d or 2d array
        array for which columns are combined. If x is 1-dimensional that it
        will be treated as a 2-d row vector.

    Returns
    -------
    x_new : ndarray
    k_li : ndarray
        Count of columns combined in bin.


    Examples
    --------
    >>> dia.combine_bins([0,1,5], np.arange(4))
    (array([0, 6]), array([1, 4]))

    this aggregates to two bins with the sum of 1 and 4 elements
    >>> np.arange(4)[0].sum()
    0
    >>> np.arange(4)[1:5].sum()
    6

    If the rightmost index is smaller than len(x)+1, then the remaining
    columns will not be included.

    >>> dia.combine_bins([0,1,3], np.arange(4))
    (array([0, 3]), array([1, 2]))
       TNF   )	npasarrayndimrangelenappendsumcolumn_stacksqueeze)	
edge_indexxis_1dxliklibin_idxijx_news	            g/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/discrete/_diagnostics_count.py_combine_binsr      s    R 	
1Av{{dAAAgJ
C
CZ1,--  'GaK/01

1QQQ!V9==##$$$

1q5OC  E  "*S//!!    	predictedc                    t          |t                    r|\  }}nd|}}|ddlm} |                    d          }|                    d          }|                    | d|           |                    |d	|           ||                    d|           |                                 |	                    d
           |                    d          }	|	                    t          j        |           d|           |	                    t          j        |          d	|           ||	                    d|           |	                                 |		                    d           |                    d          }
|
                    t          j        |          t          j        |           d           |
                    t          j        t          |                     t          |           z  t          j        t          |                     t          |           z             |
	                    d           |
                    |           |
                    |           |S )a3  diagnostic plots for comparing two lists of discrete probabilities

    Parameters
    ----------
    freq, probs_predicted : nd_arrays
        two arrays of probabilities, this can be any probabilities for
        the same events, default is designed for comparing predicted
        and observed probabilities
    label : str or tuple
        If string, then it will be used as the label for probs_predicted and
        "freq" is used for the other probabilities.
        If label is a tuple of strings, then the first is they are used as
        label for both probabilities

    upp_xlim : None or int
        If it is not None, then the xlim of the first two plots are set to
        (0, upp_xlim), otherwise the matplotlib default is used
    fig : None or matplotlib figure instance
        If fig is provided, then the axes will be added to it in a (3,1)
        subplots, otherwise a matplotlib figure instance is created

    Returns
    -------
    Figure
        The figure contains 3 subplot with probabilities, cumulative
        probabilities and a PP-plot
    freqNr   )      )figsizei7  z-o)labelz-dprobabilitiesi8  zcumulative probabilitiesi9  ozPP-plot)
isinstancelistmatplotlib.pyplotpyplotfigureadd_subplotplotset_xlimlegend	set_titler
   cumsumaranger   
set_xlabel
set_ylabel)r!   probs_predictedr%   upp_xlimfiglabel0label1pltax1ax2ax3s              r   
plot_probsr?   M   s   < % '
{''''''jjj((
//#

CHHT4vH&&&HH_d&H111Q!!!JJLLLMM/"""
//#

CHHRYt__d&H111HHRY''VH<<<Q!!!JJLLLMM,---
//#

CHHRY''4#>>>HHRYs4yy!!CII-ryT/C/Cc$ii/OPPPMM)NN6NN6Jr   c                 T   | }| j                             | j                  }|j         j        dddf         t	          j        |j        d                   k                        t                    }|4t          ||          \  }}t          ||          \  }	}|	j        d         }n||j        d         }}|}	||	z
  }
t	          j
        ||
ddddf         f          }|j        d         }t          t	          j        |          |                                          }|d|j        |j        z  z
  z  }|j         j        |j        d         z
  }||dz
  k     rddl}|                    d           |}t&          j                            ||          }t-          ||||
|d          }|S )u  
    chisquare test for predicted probabilities using cmt-opg

    Parameters
    ----------
    results : results instance
        Instance of a count regression results
    probs : ndarray
        Array of predicted probabilities with observations
        in rows and event counts in columns
    bin_edges : None or array
        intervals to combine several counts into cells
        see combine_bins

    Returns
    -------
    (api not stable, replace by test-results class)
    statistic : float
        chisquare statistic for tes
    p-value : float
        p-value of test
    df : int
        degrees of freedom for chisquare distribution
    extras : ???
        currently returns a tuple with some intermediate results
        (diff, res_aux)

    Notes
    -----

    Status : experimental, no verified unit tests, needs to be generalized
    currently only OPG version with auxiliary regression is implemented

    Assumes counts are np.arange(probs.shape[1]), i.e. consecutive
    integers starting at zero.

    Auxiliary regression drops the last column of binned probs to avoid
    that probabilities sum to 1.

    References
    ----------
    .. [1] Andrews, Donald W. K. 1988a. “Chi-Square Diagnostic Tests for
           Econometric Models: Theory.” Econometrica 56 (6): 1419–53.
           https://doi.org/10.2307/1913105.

    .. [2] Andrews, Donald W. K. 1988b. “Chi-Square Diagnostic Tests for
           Econometric Models.” Journal of Econometrics 37 (1): 135–56.
           https://doi.org/10.1016/0304-4076(88)90079-6.

    .. [3] Manjón, M., and O. Martínez. 2014. “The Chi-Squared Goodness-of-Fit
           Test for Count-Data Models.” Stata Journal 14 (4): 798–816.
    Nr   r   z!auxiliary model is rank deficientchi2)	statisticpvaluedfdiff1res_auxdistribution)model	score_obsparamsendogr
   r3   shapeastypeintr   r   r   onesfitssruncentered_tssrankwarningswarnr   rB   sfr   )resultsprobs	bin_edgesmethodresrJ   d_ind
d_ind_binsk_bins
probs_binsrF   x_auxnobsrG   	chi2_statrE   rU   rC   rD   s                      r   test_chisquare_probrd      s   j C''77IY_QQQW%5;q>)B)BBJJ3OOE*9e<<
F*9e<<
F!"%"EKNF

#EOYaaa"f677E;q>D"'$--''++--GGK'*@@@AI		ioa0	0B	FQJ9:::IZ]]9b))F

 
 
C Jr   c                       e Zd Zd ZdS )DispersionResultsc                 `    t          j        | j        | j        | j        | j        d          }|S )N)rC   rD   r[   alternative)pd	DataFramerC   rD   r[   rh   )selfframes     r   summary_framezDispersionResults.summary_frame   s9    kk+	    r   N)__name__
__module____qualname__rm    r   r   rf   rf      s#            r   rf   allFc                    |dvrt          d| d          t          | d          r| j        } | j        j        }|j        d         }|                                 }| j        dz  }||z
  }||z
  }t          j	        d|dz  
                                z            }	|
                                }
|
                                |	z  }|
|	z  }||z  
                                t          j	        d|z            z  }dt          j                            t          j        |                    z  }dt          j                            t          j        |                    z  }dt          j                            t          j        |                    z  }||g||g||gg}ddgd	dgd
dgg}||z  }t          ||                              d          }|j        d         }|j        d         }|                    ||g           |                    ddg           t          ||                              d          }|j        d         }|j        d         }|                    ||g           |                    ddg           ||z  }t          ||                              dd          }|j        d         }|j        d         }|                    ||g           |                    ddg           t          |t          j        t+          |                                                  dd          }|j        d         }|j        d         }|                    ||g           |                    ddg           t          j        |          }|r||fS t/          |dddf         |dddf         d |D             d |D             d          }|S )a  Score/LM type tests for Poisson variance assumptions

    Null Hypothesis is

    H0: var(y) = E(y) and assuming E(y) is correctly specified
    H1: var(y) ~= E(y)

    The tests are based on the constrained model, i.e. the Poisson model.
    The tests differ in their assumed alternatives, and in their maintained
    assumptions.

    Parameters
    ----------
    results : Poisson results instance
        This can be a results instance for either a discrete Poisson or a GLM
        with family Poisson.
    method : str
        Not used yet. Currently results for all methods are returned.
    _old : bool
        Temporary keyword for backwards compatibility, will be removed
        in future version of statsmodels.

    Returns
    -------
    res : instance
        The instance of DispersionResults has the hypothesis test results,
        statistic, pvalue, method, alternative, as main attributes and a
        summary_frame method that returns the results as pandas DataFrame.

    )rr   zunknown method ""_resultsr   r	   zDean Azmu (1 + a mu)zDean BzDean Cz
mu (1 + a)F)use_tzCT nb2zCT nb1HC3)cov_typerv   z
CT nb2 HC3z
CT nb1 HC3Nr   c                     g | ]
}|d          S )r   rq   .0r   s     r   
<listcomp>z+test_poisson_dispersion.<locals>.<listcomp>X  s    ...QAaD...r   c                     g | ]
}|d          S )r   rq   rz   s     r   r|   z+test_poisson_dispersion.<locals>.<listcomp>Y  s    333!1333r   zPoisson Dispersion Test)rC   rD   r[   rh   name)
ValueErrorhasattrru   rI   rL   rM   predictresid_responser
   sqrtr   r   normrW   absr   rQ   tvaluespvaluesr   rP   r   arrayrf   )rX   r[   _oldrL   rb   fittedresid2var_resid_endogvar_resid_fittedstd1var_resid_endog_sumdean_adean_bdean_cpval_dean_apval_dean_bpval_dean_cresults_alldescriptionendog_vres_ols_nb2stat_ols_nb2pval_ols_nb2res_ols_nb1stat_ols_nb1pval_ols_nb1stat_ols_hc1_nb2pval_ols_hc1_nb2stat_ols_hc1_nb1pval_ols_hc1_nb1r\   s                                  r   test_poisson_dispersionr      s   @ W5F555666w
## #"ME;q>D__F #Q&F~O71	((())D)--//!!##d*F 4'F&++--D0A0AAFejmmBF6NN333KejmmBF6NN333KejmmBF6NN333KK(K(K(*K o.o.l+-K
 &Ggv&&***77K&q)L&q)Ll3444/2333gv&&***77K&q)L&q)Ll3444,/000&Ggv&&**E*GGK"*1-"*1-(*:;<<<o6777grws7||4455995@E : G GK"*1-"*1-(*:;<<<l3444(;''K K''!!!!Q$'qqq!t$..+...33{333*   
r   Trw   c                 P   t          | d          r| j        } | j        j        }|j        d         }	|                                 }
| j        dz  }|r||z
  }n||
z
  }||
z  }|j        d         }|g}|r4| j                            | j                  }|	                    |           ||	                    |           t          |          dk    rt          j        |          }d}n
|d         }d}t          ||                              |||          }|rF|j        d         }t          j        ||          }|                    |          }|j        }|j        }nD|j        d         }	d|j        |j        z  z
  }|	|z  }t*          j                            ||          }||fS )	at  A variable addition test for the variance function

    This uses an artificial regression to calculate a variant of an LM or
    generalized score test for the specification of the variance assumption
    in a Poisson model. The performed test is a Wald test on the coefficients
    of the `exog_new_test`.

    Warning: insufficiently tested, especially for options
    ru   r   r	   r   NTF)rx   cov_kwdsrv   )r   ru   rI   rL   rM   r   r   rJ   rK   r   r   r
   r   r   rQ   eye	wald_testrC   rD   rR   rS   r   rB   rW   )rX   exog_new_testexog_new_controlinclude_score	use_endogrx   r   rv   rL   rb   r   r   	var_residr   k_constraintsex_listrJ   exuse_waldres_olsk_varsconstraintshtstat_olspval_olsrsquared_noncentereds                             r    _test_poisson_dispersion_genericr   _  s   ( w
## #"ME;q>D__F#Q&F &e^		f_	& G!'*MoG "M++GN;;	y!!!#y!!!
7||a_W%%QZ'2""Hx). # 0 0G  :!f]F33{++<9 }Q 7;w/E#EE..:===99Xr   c                 
   t          | j        t                    sddl}|                    d           | j        j        j        d         }|t          j        |df          }| j        j        }| j        j	        }| 
                                }t          j        |           }|                                 }|j        | z                      |          j        }	|j        d|z
  |z  z                      |          }
||dk    |z
  |z  dddf         z  }|                    d          }|
|	j                            |                              |	          z
  }t          j                            |          }|                    |                              |          }t          j                            |          }|j        d         }t&          j                            ||          }t-          ||||d          }|S )u  score test for zero inflation or deflation in Poisson

    This implements Jansakul and Hinde 2009 score test
    for excess zeros against a zero modified Poisson
    alternative. They use a linear link function for the
    inflation model to allow for zero deflation.

    Parameters
    ----------
    results_poisson: results instance
        The test is only valid if the results instance is a Poisson
        model.
    exog_infl : ndarray
        Explanatory variables for the zero inflated or zero modified
        alternative. I exog_infl is None, then the inflation
        probability is assumed to be constant.

    Returns
    -------
    score test results based on chisquare distribution

    Notes
    -----
    This is a score test based on the null hypothesis that
    the true model is Poisson. It will also reject for
    other deviations from a Poisson model if those affect
    the zero probabilities, e.g. in the direction of
    excess dispersion as in the Negative Binomial
    or Generalized Poisson model.
    Therefore, rejection in this test does not imply that
    zero-inflated Poisson is the appropriate model.

    Status: experimental, no verified unit tests,

    TODO: If the zero modification probability is assumed
    to be constant under the alternative, then we only have
    a scalar test score and we can use one-sided tests to
    distinguish zero inflation and deflation from the
    two-sided deviations. (The general one-sided case is
    difficult.)
    In this case the test specializes to the test by Broek

    References
    ----------
    .. [1] Jansakul, N., and J. P. Hinde. 2002. “Score Tests for Zero-Inflated
           Poisson Models.” Computational Statistics & Data Analysis 40 (1):
           75–96. https://doi.org/10.1016/S0167-9473(01)00104-9.
    r   Nz&Test is only valid if model is Poissonr   rB   )rC   rD   rE   
rank_scorerH   )r(   rI   r   rU   rV   rL   rM   r
   rP   exogr   exp
cov_paramsTdotr   linalgpinvmatrix_rankr   rB   rW   r   )results_poisson	exog_inflrU   rb   rL   r   mu	prob_zerocov_poicross_derivativecov_inflscore_obs_infl
score_inflcov_score_inflcov_score_infl_invrC   df2rE   rD   r\   s                       r   test_poisson_zeroinflation_jhr     s   b o+W55 @>??? &,Q/DGT1I&&	 !'E %D		 	 	"	"BsI((**G!s+00668I:;@@KKHEQJ)#;y"H!!!D&!QQN##A&&J 0 2 6 6w ? ? C CDT U UUN7712266zBBI
)


/
/C		BZ]]9b))F

 
 
C Jr   c                    |                                  }t          j        |           }| j        j        }|dk    |z
  |z                                  }d|z
  |z                                  |                                z
  }|t          j        |          z  }dt          j        	                    t          j
        |                    z  }t          j        	                    |          }t          j                            |          }	t          ||||	|dz  t          j        	                    |dz  d          dd          }
|
S )u`  score test for zero modification in Poisson, special case

    This assumes that the Poisson model has a constant and that
    the zero modification probability is constant.

    This is a special case of test_poisson_zeroinflation derived by
    van den Broek 1995.

    The test reports two sided and one sided alternatives based on
    the normal distribution of the test statistic.

    References
    ----------
    .. [1] Broek, Jan van den. 1995. “A Score Test for Zero Inflation in a
           Poisson Distribution.” Biometrics 51 (2): 738–43.
           https://doi.org/10.2307/2532959.

    r   r   r	   normalrC   rD   pvalue_smallerpvalue_largerrB   pvalue_chi2df_chi2rH   )r   r
   r   rI   rL   r   r   r   r   rW   r   cdfr   rB   )r   r   r   rL   score	var_scorerC   
pvalue_two
pvalue_upp
pvalue_lowr\   s              r    test_poisson_zeroinflation_broekr     s   ( 
	 	 	"	"BsI!'E zY&)388::Ei-9,1133eiikkAI	***IUZ]]26)#4#4555Jy))J	**J
! \JMM)Q,22	
 	
 	
C Jr   c                 ,   | j         j        }|                                 }t          j        |           }| j         j        dk                        t                    }|                                |                                z
  }|d|z
  z  }||z  }t          j	        
                    |j        |z  |z            }||z  }	|	|z  |	z  }
||
z
  }|t          j        |          z  }dt          j                            t          j        |                    z  }t          j                            |          }t          j                            |          }t%          |||||dz  t          j                            |dz  d          dd          }|S )u  Test for excess zeros in Poisson regression model.

    The test is implemented following Tang and Tang [1]_ equ. (12) which is
    based on the test derived in He et al 2019 [2]_.

    References
    ----------

    .. [1] Tang, Yi, and Wan Tang. 2018. “Testing Modified Zeros for Poisson
           Regression Models:” Statistical Methods in Medical Research,
           September. https://doi.org/10.1177/0962280218796253.

    .. [2] He, Hua, Hui Zhang, Peng Ye, and Wan Tang. 2019. “A Test of Inflated
           Zeros for Poisson Regression Models.” Statistical Methods in
           Medical Research 28 (4): 1157–69.
           https://doi.org/10.1177/0962280217749991.

    r   r   r	   r   r   )rI   r   r   r
   r   rL   rN   rO   r   r   invr   r   r   r   rW   r   r   r   rB   )rX   r   meanprob0countsdiffvar1pmcpmxvar2varrC   r   r   r   r\   s                    r   test_poisson_zerosr   /  s[   & 	A??DFD5MMEm!Q&..s33F::<<%))++%DAID	B
	acDj1n%%A
q&C7S=D
+Crws||#IUZ]]26)#4#4555Jy))J	**J
! \JMM)Q,22	
 	
 	
C Jr   )r   NN)NN)rr   F)NFTrw   NF)N)__doc__numpyr
   scipyr   pandasri   statsmodels.stats.baser   #statsmodels.discrete.discrete_modelr   #statsmodels.regression.linear_modelr   r   r?   rd   rf   r   r   r   r   r   rq   r   r   <module>r      sk                  . . . . . . 7 7 7 7 7 7 3 3 3 3 3 39" 9" 9"x CG< < < <~X X X Xv
 
 
 
 
 
 
 
h h h h\ E E E EPW W W Wt+ + +\. . . . .r   