"""

Accelerated Failure Time (AFT) Model with empirical likelihood inference.

AFT regression analysis is applicable when the researcher has access
to a randomly right censored dependent variable, a matrix of exogenous
variables and an indicatior variable (delta) that takes a value of 0 if the
observation is censored and 1 otherwise.

AFT References
--------------

Stute, W. (1993). "Consistent Estimation Under Random Censorship when
Covariables are Present." Journal of Multivariate Analysis.
Vol. 45. Iss. 1. 89-103

EL and AFT References
---------------------

Zhou, Kim And Bathke. "Empirical Likelihood Analysis for the Heteroskedastic
Accelerated Failure Time Model." Manuscript:
URL: www.ms.uky.edu/~mai/research/CasewiseEL20080724.pdf

Zhou, M. (2005). Empirical Likelihood Ratio with Arbitrarily Censored/
Truncated Data by EM Algorithm.  Journal of Computational and Graphical
Statistics. 14:3, 643-656.


"""
import warnings

import numpy as np
#from elregress import ElReg
from scipy import optimize
from scipy.stats import chi2

from statsmodels.regression.linear_model import OLS, WLS
from statsmodels.tools import add_constant
from statsmodels.tools.sm_exceptions import IterationLimitWarning

from .descriptive import _OptFuncts


class OptAFT(_OptFuncts):
    """
    Provides optimization functions used in estimating and conducting
    inference in an AFT model.

    Methods
    ------

    _opt_wtd_nuis_regress:
        Function optimized over nuisance parameters to compute
        the profile likelihood

    _EM_test:
        Uses the modified Em algorithm of Zhou 2005 to maximize the
        likelihood of a parameter vector.
    """
    def __init__(self):
        pass

    def _opt_wtd_nuis_regress(self, test_vals):
        """
        A function that is optimized over nuisance parameters to conduct a
        hypothesis test for the parameters of interest

        Parameters
        ----------

        params: 1d array
            The regression coefficients of the model.  This includes the
            nuisance and parameters of interests.

        Returns
        -------
        llr : float
            -2 times the log likelihood of the nuisance parameters and the
            hypothesized value of the parameter(s) of interest.
        """
        test_params = test_vals.reshape(self.model.nvar, 1)
        est_vect = self.model.uncens_exog * (self.model.uncens_endog -
                                            np.dot(self.model.uncens_exog,
                                                         test_params))
        eta_star = self._modif_newton(np.zeros(self.model.nvar), est_vect,
                                         self.model._fit_weights)
        denom = np.sum(self.model._fit_weights) + np.dot(eta_star, est_vect.T)
        self.new_weights = self.model._fit_weights / denom
        return -1 * np.sum(np.log(self.new_weights))

    def _EM_test(self, nuisance_params, params=None, param_nums=None,
                 b0_vals=None, F=None, survidx=None, uncens_nobs=None,
                numcensbelow=None, km=None, uncensored=None, censored=None,
                maxiter=None, ftol=None):
        """
        Uses EM algorithm to compute the maximum likelihood of a test

        Parameters
        ----------

        nuisance_params : ndarray
            Vector of values to be used as nuisance params.

        maxiter : int
            Number of iterations in the EM algorithm for a parameter vector

        Returns
        -------
        -2 ''*'' log likelihood ratio at hypothesized values and
        nuisance params

        Notes
        -----
        Optional parameters are provided by the test_beta function.
        """
        iters = 0
        params[param_nums] = b0_vals

        nuis_param_index = np.int_(np.delete(np.arange(self.model.nvar),
                                           param_nums))
        params[nuis_param_index] = nuisance_params
        to_test = params.reshape(self.model.nvar, 1)
        opt_res = np.inf
        diff = np.inf
        while iters < maxiter and diff > ftol:
            F = F.flatten()
            death = np.cumsum(F[::-1])
            survivalprob = death[::-1]
            surv_point_mat = np.dot(F.reshape(-1, 1),
                                1. / survivalprob[survidx].reshape(1, - 1))
            surv_point_mat = add_constant(surv_point_mat)
            summed_wts = np.cumsum(surv_point_mat, axis=1)
            wts = summed_wts[np.int_(np.arange(uncens_nobs)),
                             numcensbelow[uncensored]]
            # ^E step
            # See Zhou 2005, section 3.
            self.model._fit_weights = wts
            new_opt_res = self._opt_wtd_nuis_regress(to_test)
                # ^ Uncensored weights' contribution to likelihood value.
            F = self.new_weights
                # ^ M step
            diff = np.abs(new_opt_res - opt_res)
            opt_res = new_opt_res
            iters = iters + 1
        death = np.cumsum(F.flatten()[::-1])
        survivalprob = death[::-1]
        llike = -opt_res + np.sum(np.log(survivalprob[survidx]))
        wtd_km = km.flatten() / np.sum(km)
        survivalmax = np.cumsum(wtd_km[::-1])[::-1]
        llikemax = np.sum(np.log(wtd_km[uncensored])) + \
          np.sum(np.log(survivalmax[censored]))
        if iters == maxiter:
            warnings.warn('The EM reached the maximum number of iterations',
                          IterationLimitWarning)
        return -2 * (llike - llikemax)

    def _ci_limits_beta(self, b0, param_num=None):
        """
        Returns the difference between the log likelihood for a
        parameter and some critical value.

        Parameters
        ----------
        b0: float
            Value of a regression parameter
        param_num : int
            Parameter index of b0
        """
        return self.test_beta([b0], [param_num])[0] - self.r0


class emplikeAFT:
    """

    Class for estimating and conducting inference in an AFT model.

    Parameters
    ----------

    endog: nx1 array
        Response variables that are subject to random censoring

    exog: nxk array
        Matrix of covariates

    censors: nx1 array
        array with entries 0 or 1.  0 indicates a response was
        censored.

    Attributes
    ----------
    nobs : float
        Number of observations
    endog : ndarray
        Endog attay
    exog : ndarray
        Exogenous variable matrix
    censors
        Censors array but sets the max(endog) to uncensored
    nvar : float
        Number of exogenous variables
    uncens_nobs : float
        Number of uncensored observations
    uncens_endog : ndarray
        Uncensored response variables
    uncens_exog : ndarray
        Exogenous variables of the uncensored observations

    Methods
    -------

    params:
        Fits model parameters

    test_beta:
        Tests if beta = b0 for any vector b0.

    Notes
    -----

    The data is immediately sorted in order of increasing endogenous
    variables

    The last observation is assumed to be uncensored which makes
    estimation and inference possible.
    """
    def __init__(self, endog, exog, censors):
        self.nobs = np.shape(exog)[0]
        self.endog = endog.reshape(self.nobs, 1)
        self.exog = exog.reshape(self.nobs, -1)
        self.censors = np.asarray(censors).reshape(self.nobs, 1)
        self.nvar = self.exog.shape[1]
        idx = np.lexsort((-self.censors[:, 0], self.endog[:, 0]))
        self.endog = self.endog[idx]
        self.exog = self.exog[idx]
        self.censors = self.censors[idx]
        self.censors[-1] = 1  # Sort in init, not in function
        self.uncens_nobs = int(np.sum(self.censors))
        mask = self.censors.ravel().astype(bool)
        self.uncens_endog = self.endog[mask, :].reshape(-1, 1)
        self.uncens_exog = self.exog[mask, :]


    def _is_tied(self, endog, censors):
        """
        Indicated if an observation takes the same value as the next
        ordered observation.

        Parameters
        ----------
        endog : ndarray
            Models endogenous variable
        censors : ndarray
            arrat indicating a censored array

        Returns
        -------
        indic_ties : ndarray
            ties[i]=1 if endog[i]==endog[i+1] and
            censors[i]=censors[i+1]
        """
        nobs = int(self.nobs)
        endog_idx = endog[np.arange(nobs - 1)] == (
            endog[np.arange(nobs - 1) + 1])
        censors_idx = censors[np.arange(nobs - 1)] == (
            censors[np.arange(nobs - 1) + 1])
        indic_ties = endog_idx * censors_idx  # Both true
        return np.int_(indic_ties)

    def _km_w_ties(self, tie_indic, untied_km):
        """
        Computes KM estimator value at each observation, taking into acocunt
        ties in the data.

        Parameters
        ----------
        tie_indic: 1d array
            Indicates if the i'th observation is the same as the ith +1
        untied_km: 1d array
            Km estimates at each observation assuming no ties.
        """
        # TODO: Vectorize, even though it is only 1 pass through for any
        # function call
        num_same = 1
        idx_nums = []
        for obs_num in np.arange(int(self.nobs - 1))[::-1]:
            if tie_indic[obs_num] == 1:
                idx_nums.append(obs_num)
                num_same = num_same + 1
                untied_km[obs_num] = untied_km[obs_num + 1]
            elif tie_indic[obs_num] == 0 and num_same > 1:
                idx_nums.append(max(idx_nums) + 1)
                idx_nums = np.asarray(idx_nums)
                untied_km[idx_nums] = untied_km[idx_nums]
                num_same = 1
                idx_nums = []
        return untied_km.reshape(self.nobs, 1)

    def _make_km(self, endog, censors):
        """

        Computes the Kaplan-Meier estimate for the weights in the AFT model

        Parameters
        ----------
        endog: nx1 array
            Array of response variables
        censors: nx1 array
            Censor-indicating variable

        Returns
        -------
        Kaplan Meier estimate for each observation

        Notes
        -----

        This function makes calls to _is_tied and km_w_ties to handle ties in
        the data.If a censored observation and an uncensored observation has
        the same value, it is assumed that the uncensored happened first.
        """
        nobs = self.nobs
        num = (nobs - (np.arange(nobs) + 1.))
        denom = (nobs - (np.arange(nobs) + 1.) + 1.)
        km = (num / denom).reshape(nobs, 1)
        km = km ** np.abs(censors - 1.)
        km = np.cumprod(km)  # If no ties, this is kaplan-meier
        tied = self._is_tied(endog, censors)
        wtd_km = self._km_w_ties(tied, km)
        return (censors / wtd_km).reshape(nobs, 1)

    def fit(self):
        """

        Fits an AFT model and returns results instance

        Parameters
        ----------
        None


        Returns
        -------
        Results instance.

        Notes
        -----
        To avoid dividing by zero, max(endog) is assumed to be uncensored.
        """
        return AFTResults(self)

    def predict(self, params, endog=None):
        if endog is None:
            endog = self.endog
        return np.dot(endog, params)


class AFTResults(OptAFT):
    def __init__(self, model):
        self.model = model

    def params(self):
        """

        Fits an AFT model and returns parameters.

        Parameters
        ----------
        None


        Returns
        -------
        Fitted params

        Notes
        -----
        To avoid dividing by zero, max(endog) is assumed to be uncensored.
        """
        self.model.modif_censors = np.copy(self.model.censors)
        self.model.modif_censors[-1] = 1
        wts = self.model._make_km(self.model.endog, self.model.modif_censors)
        res = WLS(self.model.endog, self.model.exog, wts).fit()
        params = res.params
        return params

    def test_beta(self, b0_vals, param_nums, ftol=10 ** - 5, maxiter=30,
                  print_weights=1):
        """
        Returns the profile log likelihood for regression parameters
        'param_num' at 'b0_vals.'

        Parameters
        ----------
        b0_vals : list
            The value of parameters to be tested
        param_num : list
            Which parameters to be tested
        maxiter : int, optional
            How many iterations to use in the EM algorithm.  Default is 30
        ftol : float, optional
            The function tolerance for the EM optimization.
            Default is 10''**''-5
        print_weights : bool
            If true, returns the weights tate maximize the profile
            log likelihood. Default is False

        Returns
        -------

        test_results : tuple
            The log-likelihood and p-pvalue of the test.

        Notes
        -----

        The function will warn if the EM reaches the maxiter.  However, when
        optimizing over nuisance parameters, it is possible to reach a
        maximum number of inner iterations for a specific value for the
        nuisance parameters while the resultsof the function are still valid.
        This usually occurs when the optimization over the nuisance parameters
        selects parameter values that yield a log-likihood ratio close to
        infinity.

        Examples
        --------

        >>> import statsmodels.api as sm
        >>> import numpy as np

        # Test parameter is .05 in one regressor no intercept model
        >>> data=sm.datasets.heart.load()
        >>> y = np.log10(data.endog)
        >>> x = data.exog
        >>> cens = data.censors
        >>> model = sm.emplike.emplikeAFT(y, x, cens)
        >>> res=model.test_beta([0], [0])
        >>> res
        (1.4657739632606308, 0.22601365256959183)

        #Test slope is 0 in  model with intercept

        >>> data=sm.datasets.heart.load()
        >>> y = np.log10(data.endog)
        >>> x = data.exog
        >>> cens = data.censors
        >>> model = sm.emplike.emplikeAFT(y, sm.add_constant(x), cens)
        >>> res = model.test_beta([0], [1])
        >>> res
        (4.623487775078047, 0.031537049752572731)
        """
        censors = self.model.censors
        endog = self.model.endog
        exog = self.model.exog
        uncensored = (censors == 1).flatten()
        censored = (censors == 0).flatten()
        uncens_endog = endog[uncensored]
        uncens_exog = exog[uncensored, :]
        reg_model = OLS(uncens_endog, uncens_exog).fit()
        llr, pval, new_weights = reg_model.el_test(b0_vals, param_nums,
                                      return_weights=True)  # Needs to be changed
        km = self.model._make_km(endog, censors).flatten()  # when merged
        uncens_nobs = self.model.uncens_nobs
        F = np.asarray(new_weights).reshape(uncens_nobs)
        # Step 0 ^
        params = self.params()
        survidx = np.where(censors == 0)
        survidx = survidx[0] - np.arange(len(survidx[0]))
        numcensbelow = np.int_(np.cumsum(1 - censors))
        if len(param_nums) == len(params):
            llr = self._EM_test([], F=F, params=params,
                                      param_nums=param_nums,
                                b0_vals=b0_vals, survidx=survidx,
                             uncens_nobs=uncens_nobs,
                             numcensbelow=numcensbelow, km=km,
                             uncensored=uncensored, censored=censored,
                             ftol=ftol, maxiter=25)
            return llr, chi2.sf(llr, self.model.nvar)
        else:
            x0 = np.delete(params, param_nums)
            try:
                res = optimize.fmin(self._EM_test, x0,
                                   (params, param_nums, b0_vals, F, survidx,
                                    uncens_nobs, numcensbelow, km, uncensored,
                                    censored, maxiter, ftol), full_output=1,
                                    disp=0)

                llr = res[1]
                return llr, chi2.sf(llr, len(param_nums))
            except np.linalg.LinAlgError:
                return np.inf, 0

    def ci_beta(self, param_num, beta_high, beta_low, sig=.05):
        """
        Returns the confidence interval for a regression
        parameter in the AFT model.

        Parameters
        ----------
        param_num : int
            Parameter number of interest
        beta_high : float
            Upper bound for the confidence interval
        beta_low : float
            Lower bound for the confidence interval
        sig : float, optional
            Significance level.  Default is .05

        Notes
        -----
        If the function returns f(a) and f(b) must have different signs,
        consider widening the search area by adjusting beta_low and
        beta_high.

        Also note that this process is computational intensive.  There
        are 4 levels of optimization/solving.  From outer to inner:

        1) Solving so that llr-critical value = 0
        2) maximizing over nuisance parameters
        3) Using  EM at each value of nuisamce parameters
        4) Using the _modified_Newton optimizer at each iteration
           of the EM algorithm.

        Also, for very unlikely nuisance parameters, it is possible for
        the EM algorithm to not converge.  This is not an indicator
        that the solver did not find the correct solution.  It just means
        for a specific iteration of the nuisance parameters, the optimizer
        was unable to converge.

        If the user desires to verify the success of the optimization,
        it is recommended to test the limits using test_beta.
        """
        params = self.params()
        self.r0 = chi2.ppf(1 - sig, 1)
        ll = optimize.brentq(self._ci_limits_beta, beta_low,
                             params[param_num], (param_num))
        ul = optimize.brentq(self._ci_limits_beta,
                             params[param_num], beta_high, (param_num))
        return ll, ul
