
    M/Ph_                        d Z ddlZddlmZ ddlZddlZddlm	Z	 ddl
mZ ddlmZmZmZmZ ddlmc mZ ddlmc mZ ddlmZ  G d d	e          Z G d
 de          Z G d dej                  Z ej        ee           dS )zL
Created on Sat Aug 22 20:24:42 2015

Author: Josef Perktold
License: BSD-3
    N)Appender)CategoricalDtype)stats)ModelLikelihoodModelGenericLikelihoodModelGenericLikelihoodModelResults)cache_readonlyc                   `    e Zd ZdZej        Zd fd	Zd ZddZ	e
d fd	            Zej        j        ej        _        d Zd	 Zd
 Zd Zd ZddZddZd Z eej        j                  d             Zd Zd Zed             Z eej        j                  	 	 d fd	            Z xZS )OrderedModelac  Ordinal Model based on logistic or normal distribution

    The parameterization corresponds to the proportional odds model in the
    logistic case.
    The model assumes that the endogenous variable is ordered but that the
    labels have no numeric interpretation besides the ordering.

    The model is based on a latent linear variable, where we observe only a
    discretization.

    y_latent = X beta + u

    The observed variable is defined by the interval

    y = {0 if y_latent <= cut_0
         1 of cut_0 < y_latent <= cut_1
         ...
         K if cut_K < y_latent

    The probability of observing y=k conditional on the explanatory variables
    X is given by

    prob(y = k | x) = Prob(cut_k < y_latent <= cut_k+1)
                    = Prob(cut_k - x beta < u <= cut_k+1 - x beta
                    = F(cut_k+1 - x beta) - F(cut_k - x beta)

    Where F is the cumulative distribution of u which is either the normal
    or the logistic distribution, but can be set to any other continuous
    distribution. We use standardized distributions to avoid identifiability
    problems.

    Parameters
    ----------
    endog : array_like
        Endogenous or dependent ordered categorical variable with k levels.
        Labels or values of endog will internally transformed to consecutive
        integers, 0, 1, 2, ...
        pd.Series with ordered Categorical as dtype should be preferred as it
        gives the order relation between the levels.
        If endog is not a pandas Categorical, then categories are
        sorted in lexicographic order (by numpy.unique).
    exog : array_like
        Exogenous, explanatory variables. This should not include an intercept.
        pd.DataFrame are also accepted.
        see Notes about constant when using formulas
    offset : array_like
        Offset is added to the linear prediction with coefficient equal to 1.
    distr : string 'probit' or 'logit', or a distribution instance
        The default is currently 'probit' which uses the normal distribution
        and corresponds to an ordered Probit model. The distribution is
        assumed to have the main methods of scipy.stats distributions, mainly
        cdf, pdf and ppf. The inverse cdf, ppf, is only use to calculate
        starting values.

    Notes
    -----
    Status: experimental, core results are verified, still subclasses
    `GenericLikelihoodModel` which will change in future versions.

    The parameterization of OrderedModel requires that there is no constant in
    the model, neither explicit nor implicit. The constant is equivalent to
    shifting all thresholds and is therefore not separately identified.

    Patsy's formula specification does not allow a design matrix without
    explicit or implicit constant if there are categorical variables (or maybe
    splines) among explanatory variables. As workaround, statsmodels removes an
    explicit intercept.

    Consequently, there are two valid cases to get a design matrix without
    intercept when using formulas:

    - specify a model without explicit and implicit intercept which is possible
      if there are only numerical variables in the model.
    - specify a model with an explicit intercept which statsmodels will remove.

    Models with an implicit intercept will be overparameterized, the parameter
    estimates will not be fully identified, cov_params will not be invertible
    and standard errors might contain nans. The computed results will be
    dominated by numerical imprecision coming mainly from convergence tolerance
    and numerical derivatives.

    The model will raise a ValueError if a remaining constant is detected.

    Nprobitc                 n   |dk    rt           j        | _        n|dk    rt           j        | _        n|| _        |t	          j        |          }|| _        |                     ||          \  }}} t                      j	        ||fi | d }|s| j
        j        dk    r_t	          j        | j
        d          \  }	}
|
| _
        |	}t	          j        |                                          rd}t          |          nC| j
        j        dk    r3t!          | d          st          d	          | j
        j        d         }g }| j        d
k    rt          d          |                     ||           | j        dz
  | _        | j        | _        | j        | j        | j        z   z
  | _        t4          | _        d S )Nr   logit   T)return_inversezFNaN in dependent variable detected. Missing values need to be removed.   design_infoz2-dim endog not supportedr   z+There should not be a constant in the model)k_levels)r   normdistrlogisticnpasarrayoffset_check_inputssuper__init__endogndimuniqueisnanany
ValueErrorhasattrshape
k_constant_initialize_labelsr   k_extrak_varsdf_modelnobsdf_residOrderedResultsresults_class)selfr   exogr   r   kwdslabels	is_pandasr   r    indexmsg	__class__s               d/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/miscmodels/ordinal_model.pyr   zOrderedModel.__init__t   s   HDJJgDJJDJZ''F#'#5#5eT#B#B vy----- 	z!## "	$*T J J J"
8F##'')) *@C$S//)* A%%t]33 B$%@AAA  :+A. ?QJKKK::: }q(	T[4<%?@+    c                    t          | j        t          j                  s#| j        j         d}t          j        |           d}d}t          |t          j                  rt          |j	        t                    ru|j        j        st          j        dt                     |j        }|j        j        }|j        j        }|                                dk    rt'          d          ||_        d}|||fS )a  Handle endog that is pandas Categorical.

        Checks if self.distrib is legal and provides Pandas ordered Categorical
        support for endog.

        Parameters
        ----------
        endog : array_like
            Endogenous, dependent variable, 1-D.
        exog : array_like
            Exogenous, explanatory variables.
            Currently not used.

        Returns
        -------
        endog : array_like or pandas Series
            If the original endog is a pandas ordered Categorical Series,
            then the returned endog are the ``codes``, i.e. integer
            representation of ordere categorical variable
        labels : None or list
            If original endog is pandas ordered Categorical Series, then the
            categories are returned. Otherwise ``labels`` is None.
        is_pandas : bool
            This is True if original endog is a pandas ordered Categorical
            Series and False otherwise.

        z# is not a scipy.stats distribution.NFznthe endog has ordered == False, risk of capturing a wrong order for the categories. ordered == True preferred.z5missing values in categorical endog are not supportedT)
isinstancer   r   rv_continuousnamewarningswarnpdSeriesdtypesr   dtypeorderedWarningvalues
categoriescatcodesminr#   )r/   r   r0   r5   r2   r3   
endog_names          r7   r   zOrderedModel._check_inputs   s    : $*e&9:: 	:?GGG  M#	eRY'' 	!%,(899 !{* +M #K #*+ + +
 #Z
0	99;;"$$$ &5 6 6 6'
 	fi''r8   c                    || _         |t          |          | _        n|| _        | j        | j        j        \  | _        | _        n| j        j        d         dc| _        | _        d t          |d d         |dd                    D             }| j        Ht          | j	                  | j        k    rt          d          | j	                            |           d S || j        _        d S )Nr   c                 X    g | ]'\  }}t          |          d z   t          |          z   (S )/)str).0xys      r7   
<listcomp>z3OrderedModel._initialize_labels.<locals>.<listcomp>   sG     E E E#q! q66C<#a&&0 E E Er8   r:   r   z)something wrong with exog_names, too long)r2   lenr   r0   r%   r+   r)   r   zip
exog_namesRuntimeErrorextenddataxnames)r/   r2   r   threshold_namess       r7   r'   zOrderedModel._initialize_labels   s    KKDMM$DM9 %)Y_"DIt{{%)Z%5a%8!"DIt{E E'*6#2#;qrr
'C'CE E E 9 4?##dk11"#NOOOO""?33333.DIr8   c                    |                     d          d                                         }||         } t                      j        |g|R |dgd|}	|	j        j        dk    rt          |j        t                    r|j        j	        sd}
t          |
          |j        j        }|	                    |           |	j                            d          |	_        ||	j        _        |	S )N~r   	Intercept)rY   	drop_colsr   zBOnly ordered pandas Categorical are supported as endog in formulasr   )splitstripr   from_formular   r   r;   rC   r   rD   r#   rF   rG   r'   argmaxrY   ynames)clsformularY   subsetr_   argskwargsrK   original_endogmodelr5   r2   r6   s               r7   rb   zOrderedModel.from_formula   s    ]]3''*0022
j)$$J:>J J;-J JBHJ J ;q  ~35EFF &&,4&+ oo%#*5F$$V,,,+,,Q//EK *EJr8   c                 6    | j                             |          S )ag  Cdf evaluated at x.

        Parameters
        ----------
        x : array_like
            Points at which cdf is evaluated. In the model `x` is the latent
            variable plus threshold constants.

        Returns
        -------
        Value of the cumulative distribution function of the underlying latent
        variable evaluated at x.
        )r   cdfr/   rQ   s     r7   rm   zOrderedModel.cdf       z~~a   r8   c                 6    | j                             |          S )ab  Pdf evaluated at x

        Parameters
        ----------
        x : array_like
            Points at which cdf is evaluated. In the model `x` is the latent
            variable plus threshold constants.

        Returns
        -------
        Value of the probability density function of the underlying latent
        variable evaluated at x.
        )r   pdfrn   s     r7   rq   zOrderedModel.pdf#  ro   r8   c                 ~    t          j        |                     |          |                     |          z
  d          S )a  Interval probability.

        Probability that value is in interval (low, upp], computed as

            prob = cdf(upp) - cdf(low)

        Parameters
        ----------
        low : array_like
            lower bound for interval
        upp : array_like
            upper bound for interval

        Returns
        -------
        float or ndarray
            Probability that value falls in interval (low, upp]

        r   )r   maximumrm   )r/   lowupps      r7   probzOrderedModel.prob3  s/    ( z$((3--$((3--7;;;r8   c                    || j         dz
   d         }t          j        |dd         t          j        |dd                   f                                          }t          j        t          j         g|t          j        gf          }|S )aW  transformation of the parameters in the optimization

        Parameters
        ----------
        params : nd_array
            Contains (exog_coef, transformed_thresholds) where exog_coef are
            the coefficient for the explanatory variables in the linear term,
            transformed threshold or cutoff points. The first, lowest threshold
            is unchanged, all other thresholds are in terms of exponentiated
            increments.

        Returns
        -------
        thresh : nd_array
            Thresh are the thresholds or cutoff constants for the intervals.

        r   N)r   r   concatenateexpcumsuminf)r/   params	th_paramsthreshs       r7   transform_threshold_paramsz'OrderedModel.transform_threshold_paramsI  s    $ T]Q./001	2A2!#	!""!6!6!8 9 99? 	26'FRVH =>>r8   c           
          t          j        |dd         t          j        t          j        |dd                             f          }|S )aG  obtain transformed thresholds from original thresholds or cutoffs

        Parameters
        ----------
        params : ndarray
            Threshold values, cutoff constants for choice intervals, which
            need to be monotonically increasing.

        Returns
        -------
        thresh_params : ndarrray
            Transformed threshold parameter.
            The first, lowest threshold is unchanged, all other thresholds are
            in terms of exponentiated increments.
            Transformed parameters can be any real number without restrictions.

        Nr   r:   )r   rx   logdiff)r/   r|   thresh_paramss      r7   "transform_reverse_threshold_paramsz/OrderedModel.transform_reverse_threshold_paramsa  sL    $ rr
(*rwvcrc{/C/C(D(D(F G Gr8   rv   c                 H   |                      |          }|                     |||          }|dk    r|S |dddf         }|dd         |z
  }|dd         |z
  }|dk    r|                     ||          }	|	S |dv r|                     |          }
|
S t	          d          )	a  
        Predicted probabilities for each level of the ordinal endog.

        Parameters
        ----------
        params : ndarray
            Parameters for the Model, (exog_coef, transformed_thresholds).
        exog : array_like, optional
            Design / exogenous data. If exog is None, model exog is used.
        offset : array_like, optional
            Offset is added to the linear prediction with coefficient
            equal to 1. If offset is not provided and exog
            is None, uses the model's offset if present.  If not, uses
            0 as the default value.
        which : {"prob", "linpred", "cumprob"}
            Determines which statistic is predicted.

            - prob : predicted probabilities to be in each choice. 2-dim.
            - linear : 1-dim linear prediction of the latent variable
              ``x b + offset``
            - cumprob : predicted cumulative probability to be in choice k or
              lower

        Returns
        -------
        predicted values : ndarray
            If which is "prob", then 2-dim predicted probabilities with
            observations in rows and one column for each category or level of
            the categorical dependent variable.
            If which is "cumprob", then "prob" ar cumulatively added to get the
            cdf at k, i.e. probability of observing choice k or lower.
            If which is "linpred", then the conditional prediction of the
            latent variable is returned. In this case, the return is
            one-dimensional.
        )r0   r   linpredNr:   r   rv   )cumcumprobz`which` is not available)r   _linpredrv   rm   r#   )r/   r|   r0   r   whichr~   xbrt   ru   rv   r   s              r7   predictzOrderedModel.predictw  s    L 0088]]6V]<<II4[SbSkBQRRj2oF??99S#&&DK(((hhsmmGN7888r8   c                 B   || j         }|| j        }n|d}|t          j        |          }|Ot          j        |          }t          j        |          }|                    |d| j        dz
                      }nt          j        | j                  }|||z  }|S )ai  Linear prediction of latent variable `x b + offset`.

        Parameters
        ----------
        params : ndarray
            Parameters for the model, (exog_coef, transformed_thresholds)
        exog : array_like, optional
            Design / exogenous data. Is exog is None, model exog is used.
        offset : array_like, optional
            Offset is added to the linear prediction with coefficient
            equal to 1. If offset is not provided and exog
            is None, uses the model's offset if present.  If not, uses
            0 as the default value.

        Returns
        -------
        linear : ndarray
            1-dim linear prediction given by exog times linear params plus
            offset. This is the prediction for the underlying latent variable.
            If exog and offset are None, then the predicted values are zero.

        Nr   r   )r0   r   r   r   dotr   zerosr+   )r/   r|   r0   r   _exog_paramsr   s          r7   r   zOrderedModel._linpred  s    . <9D~~Z''FJt$$Ej((Gii(=4=1+<)=(= >??GGhty))GvGr8   c                     |                      |          }|| j                 }|| j        dz            }|                     |          }||z
  }||z
  }||fS )a}  Integration bounds for the observation specific interval.

        This defines the lower and upper bounds for the intervals of the
        choices of all observations.

        The bounds for observation are given by

            a_{k_i-1} - linpred_i, a_k_i - linpred_i

        where
        - k_i is the choice in observation i.
        - a_{k_i-1} and a_k_i are thresholds (cutoffs) for choice k_i
        - linpred_i is the linear prediction for observation i

        Parameters
        ----------
        params : ndarray
            Parameters for the model, (exog_coef, transformed_thresholds)

        Return
        ------
        low : ndarray
            Lower bounds for choice intervals of each observation,
            1-dim with length nobs
        upp : ndarray
            Upper bounds for choice intervals of each observation,
            1-dim with length nobs.

        r   )r   r   r   )r/   r|   r~   thresh_i_lowthresh_i_uppr   rt   ru   s           r7   _boundszOrderedModel._bounds  sa    < 0088dj)dj1n-]]6""RRCxr8   c                 P    |                      |                                          S N)
loglikeobssum)r/   r|   s     r7   loglikezOrderedModel.loglike   s"     v&&**,,,r8   c                     |                      |          \  }}|                     ||          }t          j        |dz             S )aY  
        Log-likelihood of OrderdModel for all observations.

        Parameters
        ----------
        params : array_like
            The parameters of the model.

        Returns
        -------
        loglike_obs : array_like
            The log likelihood for each observation of the model evaluated
            at ``params``.
        g#B;)r   rv   r   r   )r/   r|   rt   ru   rv   s        r7   r   zOrderedModel.loglikeobs  s@     <<''Syyc""vdUl###r8   c                 `   |                      |          \  }}|                     ||          }|                     |          }|                     |          }||z
  dddf         }||dddf         z  }t          j        |ddddf          | j        z  |ddddf         f          }|S )zscore, first derivative of loglike for each observations

        This currently only implements the derivative with respect to the
        exog parameters, but not with respect to threshold parameters.

        Nr   )r   rv   rq   r   column_stackr0   )	r/   r|   rt   ru   rv   pdf_upppdf_lowscore_factorsos	            r7   
score_obs_zOrderedModel.score_obs_  s     <<''Syyc""((3--((3--  ')111d73QQQW%_|AAArrE22TY>*111abb513 4 4	r8   c                 f   t          j        | j                  t          | j                  z  }| j                            t          j        |                                dd                    }|                     |          }t          j	        t          j
        | j                  |f          }|S )a  Start parameters for the optimization corresponding to null model.

        The threshold are computed from the observed frequencies and
        transformed to the exponential increments parameterization.
        The parameters for explanatory variables are set to zero.
        r   r   )r   bincountr   rT   r   ppfcliprz   r   rx   r   r)   )r/   freq	start_ppfstart_thresholdstart_paramss        r7   r   zOrderedModel.start_params8  s     {4:&&TZ8JNN274;;==!Q#?#?@@	AA)LL~rx'<'<o&NOOr8   nm  r   r   c           
          t                      j        }	 |	d||||||d|}
t          | |
          }d|_        t	          |          }|S )N)r   methodmaxiterfull_outputdispcallbackr    )r   fitr-   hasconstOrderedResultsWrapper)r/   r   r   r   r   r   r   retallri   
fit_methodmlefit	ordmlefitresultr6   s                r7   r   zOrderedModel.fitG  sw     WW[
 D#)7(3!%D D =CD D
 #400	 	&y11r8   )Nr   r   )NN)NNrv   )Nr   r   r   r   Nr   ) __name__
__module____qualname____doc__r   r{   _formula_max_endogr   r   r'   classmethodrb   r   __func__rm   rq   rv   r   r   r   r   r   r   r   r   r   r   propertyr   r   r   __classcell__)r6   s   @r7   r   r      s       S Sh /, /, /, /, /, /,b6( 6( 6(p/ / / /0      [4 %*$6$>L!! ! ! ! ! ! < < <,  0  ,49 49 49 49l* * * *X% % %N X$,455- - 65-$ $ $&  @   X Xo!)**KL*+     +*    r8   r   c                       e Zd ZdZd Zed             Zed             Zed             Zed             Z	ed             Z
dS )	r-   zResults class for OrderedModel

    This class inherits from GenericLikelihoodModelResults and not all
    inherited methods might be appropriate in this case.
    c                    t          j        | j        j                  }t	          j        | j        j        |d          }t	          j        |                                                     d          |d          }t	          j	        ||
                    t                    dd          j                            d          }|S )z<prediction table

        returns pandas DataFrame

        T)rG   rD   r   F)marginsdropnar   )r   arangerk   r   r@   Categoricalr   r   rc   crosstabastypeintTfillna)r/   rG   observed	predictedtables        r7   
pred_tablezOrderedResults.pred_tableb  s     Ytz233
>$*"2-7G G GN4<<>>#8#8#;#;.8$H H H	I$OOC00$(#(* * * +,FF1II 	 r8   c                 N    | j         j        }| j                             |          S )zS
        Value of the loglikelihood of model without explanatory variables
        )rk   r   r   )r/   params_nulls     r7   llnullzOrderedResults.llnullt  s$    
 j-z!!+...r8   c                 &    d| j         | j        z  z
  S )zC
        McFadden's pseudo-R-squared. `1 - (llf / llnull)`
        r   )llfr   r/   s    r7   	prsquaredzOrderedResults.prsquared}  s    
 48DK'''r8   c                 &    d| j         | j        z
  z  S )zM
        Likelihood ratio chi-squared statistic; `-2*(llnull - llf)`
        )r   r   r   s    r7   llrzOrderedResults.llr  s    
 4;)**r8   c                 j    t           j        j                            | j        | j        j                  S )z
        The chi-squared probability of getting a log-likelihood ratio
        statistic greater than llr.  llr has a chi-squared distribution
        with degrees of freedom `df_model`.
        )r   distributionschi2sfr   rk   r)   r   s    r7   
llr_pvaluezOrderedResults.llr_pvalue  s'     "'**48TZ5FGGGr8   c                     ddl m} | j        j        }|                                 } ||          d         }|t          j        |j        d                   |f         }|S )u"  probability residual

        Probability-scale residual is ``P(Y < y) − P(Y > y)`` where `Y` is the
        observed choice and ``y`` is a random variable corresponding to the
        predicted distribution.

        References
        ----------
        Shepherd BE, Li C, Liu Q (2016) Probability-scale residuals for
        continuous, discrete, and censored data.
        The Canadian Journal of Statistics. 44:463–476.

        Li C and Shepherd BE (2012) A new residual for ordinal outcomes.
        Biometrika. 99: 473–480

        r   )prob_larger_ordinal_choicer   ) statsmodels.stats.diagnostic_genr   rk   r   r   r   r   r%   )r/   r   r   fittedr
resid_probs         r7   r   zOrderedResults.resid_prob  sh    $ 	POOOOO
 &&v..q1ryQ00%78
r8   N)r   r   r   r   r   r
   r   r   r   r   r   r   r8   r7   r-   r-   [  s           $ / / ^/ ( ( ^( + + ^+ H H ^H   ^  r8   r-   c                       e Zd ZdS )r   N)r   r   r   r   r8   r7   r   r     s        Dr8   r   ) r   r>   statsmodels.compat.pandasr   numpyr   pandasr@   pandas.api.typesr   scipyr   statsmodels.base.modelr   r   r   r	   statsmodels.base.wrapperbasewrapperwrap#statsmodels.regression.linear_model
regressionlinear_modellmstatsmodels.tools.decoratorsr
   r   r-   RegressionResultsWrapperr   populate_wrapperr   r8   r7   <module>r      s     . . . . . .         - - - - - -                  ( ' ' ' ' ' ' ' ' 0 0 0 0 0 0 0 0 0 7 7 7 7 7 7{ { { { {) { { {|Q Q Q Q Q2 Q Q Qh	 	 	 	 	B7 	 	 	  +^ < < < < <r8   