"""
Created on Thu Feb 11 09:19:30 2021

Author: Josef Perktold
License: BSD-3

"""
import warnings

import numpy as np
from scipy import interpolate, stats

# helper functions to work on a grid of cdf and pdf, histogram

class _Grid:
    """Create Grid values and indices, grid in [0, 1]^d

    This class creates a regular grid in a d dimensional hyper cube.

    Intended for internal use, implementation might change without warning.


    Parameters
    ----------
    k_grid : tuple or array_like
        number of elements for axes, this defines k_grid - 1 equal sized
        intervals of [0, 1] for each axis.
    eps : float
        If eps is not zero, then x values will be clipped to [eps, 1 - eps],
        i.e. to the interior of the unit interval or hyper cube.


    Attributes
    ----------
    k_grid : list of number of grid points
    x_marginal: list of 1-dimensional marginal values
    idx_flat: integer array with indices
    x_flat: flattened grid values,
        rows are grid points, columns represent variables or axis.
        ``x_flat`` is currently also 2-dim in the univariate 1-dim grid case.

    """

    def __init__(self, k_grid, eps=0):
        self.k_grid = k_grid

        x_marginal = [np.arange(ki) / (ki - 1) for ki in k_grid]

        idx_flat = np.column_stack(
                np.unravel_index(np.arange(np.prod(k_grid)), k_grid)
                ).astype(float)
        x_flat = idx_flat / idx_flat.max(0)
        if eps != 0:
            x_marginal = [np.clip(xi, eps, 1 - eps) for xi in x_marginal]
            x_flat = np.clip(x_flat, eps, 1 - eps)

        self.x_marginal = x_marginal
        self.idx_flat = idx_flat
        self.x_flat = x_flat


def prob2cdf_grid(probs):
    """Cumulative probabilities from cell provabilites on a grid

    Parameters
    ----------
    probs : array_like
        Rectangular grid of cell probabilities.

    Returns
    -------
    cdf : ndarray
        Grid of cumulative probabilities with same shape as probs.
    """
    cdf = np.asarray(probs).copy()
    k = cdf.ndim
    for i in range(k):
        cdf = cdf.cumsum(axis=i)

    return cdf


def cdf2prob_grid(cdf, prepend=0):
    """Cell probabilities from cumulative probabilities on a grid.

    Parameters
    ----------
    cdf : array_like
        Grid of cumulative probabilities with same shape as probs.

    Returns
    -------
    probs : ndarray
        Rectangular grid of cell probabilities.

    """
    if prepend is None:
        prepend = np._NoValue
    prob = np.asarray(cdf).copy()
    k = prob.ndim
    for i in range(k):
        prob = np.diff(prob, prepend=prepend, axis=i)

    return prob


def average_grid(values, coords=None, _method="slicing"):
    """Compute average for each cell in grid using endpoints

    Parameters
    ----------
    values : array_like
        Values on a grid that will average over corner points of each cell.
    coords : None or list of array_like
        Grid coordinates for each axis use to compute volumne of cell.
        If None, then averaged values are not rescaled.
    _method : {"slicing", "convolve"}
        Grid averaging is implemented using numpy "slicing" or using
        scipy.signal "convolve".

    Returns
    -------
    Grid with averaged cell values.
    """
    k_dim = values.ndim
    if _method == "slicing":
        p = values.copy()

        for d in range(k_dim):
            # average (p[:-1] + p[1:]) / 2 over each axis
            sl1 = [slice(None, None, None)] * k_dim
            sl2 = [slice(None, None, None)] * k_dim
            sl1[d] = slice(None, -1, None)
            sl2[d] = slice(1, None, None)
            sl1 = tuple(sl1)
            sl2 = tuple(sl2)

            p = (p[sl1] + p[sl2]) / 2

    elif _method == "convolve":
        from scipy import signal
        p = signal.convolve(values, 0.5**k_dim * np.ones([2] * k_dim),
                            mode="valid")

    if coords is not None:
        dx = np.array(1)
        for d in range(k_dim):
            dx = dx[..., None] * np.diff(coords[d])

        p = p * dx

    return p


def nearest_matrix_margins(mat, maxiter=100, tol=1e-8):
    """nearest matrix with uniform margins

    Parameters
    ----------
    mat : array_like, 2-D
        Matrix that will be converted to have uniform margins.
        Currently, `mat` has to be two dimensional.
    maxiter : in
        Maximum number of iterations.
    tol : float
        Tolerance for convergence, defined for difference between largest and
        smallest margin in each dimension.

    Returns
    -------
    ndarray, nearest matrix with uniform margins.

    Notes
    -----
    This function is intended for internal use and will be generalized in
    future. API will change.

    changed in 0.14 to support k_dim > 2.


    """
    pc = np.asarray(mat)
    converged = False

    for _ in range(maxiter):
        pc0 = pc.copy()
        for ax in range(pc.ndim):
            axs = tuple([i for i in range(pc.ndim) if not i == ax])
            pc0 /= pc.sum(axis=axs, keepdims=True)
        pc = pc0
        pc /= pc.sum()

        # check convergence
        mptps = []
        for ax in range(pc.ndim):
            axs = tuple([i for i in range(pc.ndim) if not i == ax])
            marg = pc.sum(axis=axs, keepdims=False)
            mptps.append(np.ptp(marg))
        if max(mptps) < tol:
            converged = True
            break

    if not converged:
        from statsmodels.tools.sm_exceptions import ConvergenceWarning
        warnings.warn("Iterations did not converge, maxiter reached",
                      ConvergenceWarning)
    return pc


def _rankdata_no_ties(x):
    """rankdata without ties for 2-d array

    This is a simplified version for ranking data if there are no ties.
    Works vectorized across columns.

    See Also
    --------
    scipy.stats.rankdata

    """
    nobs, k_vars = x.shape
    ranks = np.ones((nobs, k_vars))
    sidx = np.argsort(x, axis=0)
    ranks[sidx, np.arange(k_vars)] = np.arange(1, nobs + 1)[:, None]
    return ranks


def frequencies_fromdata(data, k_bins, use_ranks=True):
    """count of observations in bins (histogram)

    currently only for bivariate data

    Parameters
    ----------
    data : array_like
        Bivariate data with observations in rows and two columns. Binning is
        in unit rectangle [0, 1]^2. If use_rank is False, then data should be
        in unit interval.
    k_bins : int
        Number of bins along each dimension in the histogram
    use_ranks : bool
        If use_rank is True, then data will be converted to ranks without
        tie handling.

    Returns
    -------
    bin counts : ndarray
        Frequencies are the number of observations in a given bin.
        Bin counts are a 2-dim array with k_bins rows and k_bins columns.

    Notes
    -----
    This function is intended for internal use and will be generalized in
    future. API will change.
    """
    data = np.asarray(data)
    k_dim = data.shape[-1]
    k = k_bins + 1
    g2 = _Grid([k] * k_dim, eps=0)
    if use_ranks:
        data = _rankdata_no_ties(data) / (data.shape[0] + 1)
        # alternatives: scipy handles ties, but uses np.apply_along_axis
        # rvs = stats.rankdata(rvs, axis=0) / (rvs.shape[0] + 1)
        # rvs = (np.argsort(np.argsort(rvs, axis=0), axis=0) + 1
        #                              ) / (rvs.shape[0] + 1)
    freqr, _ = np.histogramdd(data, bins=g2.x_marginal)
    return freqr


def approx_copula_pdf(copula, k_bins=10, force_uniform=True, use_pdf=False):
    """Histogram probabilities as approximation to a copula density.

    Parameters
    ----------
    copula : instance
        Instance of a copula class. Only the ``pdf`` method is used.
    k_bins : int
        Number of bins along each dimension in the approximating histogram.
    force_uniform : bool
        If true, then the pdf grid will be adjusted to have uniform margins
        using `nearest_matrix_margin`.
        If false, then no adjustment is done and the margins may not be exactly
        uniform.
    use_pdf : bool
        If false, then the grid cell probabilities will be computed from the
        copula cdf.
        If true, then the density, ``pdf``, is used and cell probabilities
        are approximated by averaging the pdf of the cell corners. This is
        only useful if the cdf is not available.

    Returns
    -------
    bin probabilites : ndarray
        Probability that random variable falls in given bin. This corresponds
        to a discrete distribution, and is not scaled to bin size to form a
        piecewise uniform, histogram density.
        Bin probabilities are a k-dim array with k_bins segments in each
        dimensionrows.

    Notes
    -----
    This function is intended for internal use and will be generalized in
    future. API will change.
    """
    k_dim = copula.k_dim
    k = k_bins + 1
    ks = tuple([k] * k_dim)

    if use_pdf:
        g = _Grid([k] * k_dim, eps=0.1 / k_bins)
        pdfg = copula.pdf(g.x_flat).reshape(*ks)
        # correct for bin size
        pdfg *= 1 / k**k_dim
        ag = average_grid(pdfg)
        if force_uniform:
            pdf_grid = nearest_matrix_margins(ag, maxiter=100, tol=1e-8)
        else:
            pdf_grid = ag / ag.sum()
    else:
        g = _Grid([k] * k_dim, eps=1e-6)
        cdfg = copula.cdf(g.x_flat).reshape(*ks)
        # correct for bin size
        pdf_grid = cdf2prob_grid(cdfg, prepend=None)
        # TODO: check boundary approximation, eg. undefined at zero
        # for now just normalize
        pdf_grid /= pdf_grid.sum()

    return pdf_grid


# functions to evaluate bernstein polynomials

def _eval_bernstein_1d(x, fvals, method="binom"):
    """Evaluate 1-dimensional bernstein polynomial given grid of values.

    experimental, comparing methods

    Parameters
    ----------
    x : array_like
        Values at which to evaluate the Bernstein polynomial.
    fvals : ndarray
        Grid values of coefficients for Bernstein polynomial basis in the
        weighted sum.
    method: "binom", "beta" or "bpoly"
        Method to construct Bernstein polynomial basis, used for comparison
        of parameterizations.

        - "binom" uses pmf of Binomial distribution
        - "beta" uses pdf of Beta distribution
        - "bpoly" uses one interval in scipy.interpolate.BPoly

    Returns
    -------
    Bernstein polynomial at evaluation points, weighted sum of Bernstein
    polynomial basis.
    """
    k_terms = fvals.shape[-1]
    xx = np.asarray(x)
    k = np.arange(k_terms).astype(float)
    n = k_terms - 1.

    if method.lower() == "binom":
        # Divide by 0 RuntimeWarning here
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", RuntimeWarning)
            poly_base = stats.binom.pmf(k, n, xx[..., None])
        bp_values = (fvals * poly_base).sum(-1)
    elif method.lower() == "bpoly":
        bpb = interpolate.BPoly(fvals[:, None], [0., 1])
        bp_values = bpb(x)
    elif method.lower() == "beta":
        # Divide by 0 RuntimeWarning here
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", RuntimeWarning)
            poly_base = stats.beta.pdf(xx[..., None], k + 1, n - k + 1) / (n + 1)
        bp_values = (fvals * poly_base).sum(-1)
    else:
        raise ValueError("method not recogized")

    return bp_values


def _eval_bernstein_2d(x, fvals):
    """Evaluate 2-dimensional bernstein polynomial given grid of values

    experimental

    Parameters
    ----------
    x : array_like
        Values at which to evaluate the Bernstein polynomial.
    fvals : ndarray
        Grid values of coefficients for Bernstein polynomial basis in the
        weighted sum.

    Returns
    -------
    Bernstein polynomial at evaluation points, weighted sum of Bernstein
    polynomial basis.
    """
    k_terms = fvals.shape
    k_dim = fvals.ndim
    if k_dim != 2:
        raise ValueError("`fval` needs to be 2-dimensional")
    xx = np.atleast_2d(x)
    if xx.shape[1] != 2:
        raise ValueError("x needs to be bivariate and have 2 columns")

    x1, x2 = xx.T
    n1, n2 = k_terms[0] - 1, k_terms[1] - 1
    k1 = np.arange(k_terms[0]).astype(float)
    k2 = np.arange(k_terms[1]).astype(float)

    # we are building a nobs x n1 x n2 array
    poly_base = (stats.binom.pmf(k1[None, :, None], n1, x1[:, None, None]) *
                 stats.binom.pmf(k2[None, None, :], n2, x2[:, None, None]))
    bp_values = (fvals * poly_base).sum(-1).sum(-1)

    return bp_values


def _eval_bernstein_dd(x, fvals):
    """Evaluate d-dimensional bernstein polynomial given grid of valuesv

    experimental

    Parameters
    ----------
    x : array_like
        Values at which to evaluate the Bernstein polynomial.
    fvals : ndarray
        Grid values of coefficients for Bernstein polynomial basis in the
        weighted sum.

    Returns
    -------
    Bernstein polynomial at evaluation points, weighted sum of Bernstein
    polynomial basis.
    """
    k_terms = fvals.shape
    k_dim = fvals.ndim
    xx = np.atleast_2d(x)

    # The following loop is a tricky
    # we add terms for each x and expand dimension of poly base in each
    # iteration using broadcasting

    poly_base = np.zeros(x.shape[0])
    for i in range(k_dim):
        ki = np.arange(k_terms[i]).astype(float)
        for _ in range(i+1):
            ki = ki[..., None]
        ni = k_terms[i] - 1
        xi = xx[:, i]
        poly_base = poly_base[None, ...] + stats.binom._logpmf(ki, ni, xi)

    poly_base = np.exp(poly_base)
    bp_values = fvals.T[..., None] * poly_base

    for i in range(k_dim):
        bp_values = bp_values.sum(0)

    return bp_values


def _ecdf_mv(data, method="seq", use_ranks=True):
    """
    Multivariate empiricial distribution function, empirical copula


    Notes
    -----
    Method "seq" is faster than method "brute", but supports mainly bivariate
    case. Speed advantage of "seq" is increasing in number of observations
    and decreasing in number of variables.
    (see Segers ...)

    Warning: This does not handle ties. The ecdf is based on univariate ranks
    without ties. The assignment of ranks to ties depends on the sorting
    algorithm and the initial ordering of the data.

    When the original data is used instead of ranks, then method "brute"
    computes the correct ecdf counts even in the case of ties.

    """
    x = np.asarray(data)
    n = x.shape[0]
    if use_ranks:
        x = _rankdata_no_ties(x) / n
    if method == "brute":
        count = [((x <= x[i]).all(1)).sum() for i in range(n)]
        count = np.asarray(count)
    elif method.startswith("seq"):
        sort_idx0 = np.argsort(x[:, 0])
        x_s0 = x[sort_idx0]
        x1 = x_s0[:, 1:]
        count_smaller = [(x1[:i] <= x1[i]).all(1).sum() + 1 for i in range(n)]
        count = np.empty(x.shape[0])
        count[sort_idx0] = count_smaller
    else:
        raise ValueError("method not available")

    return count, x