"""Tests and descriptive statistics with weights


Created on 2010-09-18

Author: josef-pktd
License: BSD (3-clause)


References
----------
SPSS manual
SAS manual

This follows in large parts the SPSS manual, which is largely the same as
the SAS manual with different, simpler notation.

Freq, Weight in SAS seems redundant since they always show up as product, SPSS
has only weights.

Notes
-----

This has potential problems with ddof, I started to follow numpy with ddof=0
by default and users can change it, but this might still mess up the t-tests,
since the estimates for the standard deviation will be based on the ddof that
the user chooses.
- fixed ddof for the meandiff ttest, now matches scipy.stats.ttest_ind

Note: scipy has now a separate, pooled variance option in ttest, but I have not
compared yet.

"""

import numpy as np
from scipy import stats

from statsmodels.tools.decorators import cache_readonly


class DescrStatsW:
    """
    Descriptive statistics and tests with weights for case weights

    Assumes that the data is 1d or 2d with (nobs, nvars) observations in rows,
    variables in columns, and that the same weight applies to each column.

    If degrees of freedom correction is used, then weights should add up to the
    number of observations. ttest also assumes that the sum of weights
    corresponds to the sample size.

    This is essentially the same as replicating each observations by its
    weight, if the weights are integers, often called case or frequency weights.

    Parameters
    ----------
    data : array_like, 1-D or 2-D
        dataset
    weights : None or 1-D ndarray
        weights for each observation, with same length as zero axis of data
    ddof : int
        default ddof=0, degrees of freedom correction used for second moments,
        var, std, cov, corrcoef.
        However, statistical tests are independent of `ddof`, based on the
        standard formulas.

    Examples
    --------

    >>> import numpy as np
    >>> np.random.seed(0)
    >>> x1_2d = 1.0 + np.random.randn(20, 3)
    >>> w1 = np.random.randint(1, 4, 20)
    >>> d1 = DescrStatsW(x1_2d, weights=w1)
    >>> d1.mean
    array([ 1.42739844,  1.23174284,  1.083753  ])
    >>> d1.var
    array([ 0.94855633,  0.52074626,  1.12309325])
    >>> d1.std_mean
    array([ 0.14682676,  0.10878944,  0.15976497])

    >>> tstat, pval, df = d1.ttest_mean(0)
    >>> tstat; pval; df
    array([  9.72165021,  11.32226471,   6.78342055])
    array([  1.58414212e-12,   1.26536887e-14,   2.37623126e-08])
    44.0

    >>> tstat, pval, df = d1.ttest_mean([0, 1, 1])
    >>> tstat; pval; df
    array([ 9.72165021,  2.13019609,  0.52422632])
    array([  1.58414212e-12,   3.87842808e-02,   6.02752170e-01])
    44.0

    # if weights are integers, then asrepeats can be used

    >>> x1r = d1.asrepeats()
    >>> x1r.shape
    ...
    >>> stats.ttest_1samp(x1r, [0, 1, 1])
    ...

    """

    def __init__(self, data, weights=None, ddof=0):

        self.data = np.asarray(data)
        if weights is None:
            self.weights = np.ones(self.data.shape[0])
        else:
            self.weights = np.asarray(weights).astype(float)
            # TODO: why squeeze?
            if len(self.weights.shape) > 1 and len(self.weights) > 1:
                self.weights = self.weights.squeeze()
        self.ddof = ddof

    @cache_readonly
    def sum_weights(self):
        """Sum of weights"""
        return self.weights.sum(0)

    @cache_readonly
    def nobs(self):
        """alias for number of observations/cases, equal to sum of weights
        """
        return self.sum_weights

    @cache_readonly
    def sum(self):
        """weighted sum of data"""
        return np.dot(self.data.T, self.weights)

    @cache_readonly
    def mean(self):
        """weighted mean of data"""
        return self.sum / self.sum_weights

    @cache_readonly
    def demeaned(self):
        """data with weighted mean subtracted"""
        return self.data - self.mean

    @cache_readonly
    def sumsquares(self):
        """weighted sum of squares of demeaned data"""
        return np.dot((self.demeaned ** 2).T, self.weights)

    # need memoize instead of cache decorator
    def var_ddof(self, ddof=0):
        """variance of data given ddof

        Parameters
        ----------
        ddof : int, float
            degrees of freedom correction, independent of attribute ddof

        Returns
        -------
        var : float, ndarray
            variance with denominator ``sum_weights - ddof``
        """
        return self.sumsquares / (self.sum_weights - ddof)

    def std_ddof(self, ddof=0):
        """standard deviation of data with given ddof

        Parameters
        ----------
        ddof : int, float
            degrees of freedom correction, independent of attribute ddof

        Returns
        -------
        std : float, ndarray
            standard deviation with denominator ``sum_weights - ddof``
        """
        return np.sqrt(self.var_ddof(ddof=ddof))

    @cache_readonly
    def var(self):
        """variance with default degrees of freedom correction
        """
        return self.sumsquares / (self.sum_weights - self.ddof)

    @cache_readonly
    def _var(self):
        """variance without degrees of freedom correction

        used for statistical tests with controlled ddof
        """
        return self.sumsquares / self.sum_weights

    @cache_readonly
    def std(self):
        """standard deviation with default degrees of freedom correction
        """
        return np.sqrt(self.var)

    @cache_readonly
    def cov(self):
        """weighted covariance of data if data is 2 dimensional

        assumes variables in columns and observations in rows
        uses default ddof
        """
        cov_ = np.dot(self.weights * self.demeaned.T, self.demeaned)
        cov_ /= self.sum_weights - self.ddof
        return cov_

    @cache_readonly
    def corrcoef(self):
        """weighted correlation with default ddof

        assumes variables in columns and observations in rows
        """
        return self.cov / self.std / self.std[:, None]

    @cache_readonly
    def std_mean(self):
        """standard deviation of weighted mean
        """
        std = self.std
        if self.ddof != 0:
            # ddof correction,   (need copy of std)
            std = std * np.sqrt(
                (self.sum_weights - self.ddof) / self.sum_weights
            )

        return std / np.sqrt(self.sum_weights - 1)

    def quantile(self, probs, return_pandas=True):
        """
        Compute quantiles for a weighted sample.

        Parameters
        ----------
        probs : array_like
            A vector of probability points at which to calculate the
            quantiles.  Each element of `probs` should fall in [0, 1].
        return_pandas : bool
            If True, return value is a Pandas DataFrame or Series.
            Otherwise returns a ndarray.

        Returns
        -------
        quantiles : Series, DataFrame, or ndarray
            If `return_pandas` = True, returns one of the following:
              * data are 1d, `return_pandas` = True: a Series indexed by
                the probability points.
              * data are 2d, `return_pandas` = True: a DataFrame with
                the probability points as row index and the variables
                as column index.

            If `return_pandas` = False, returns an ndarray containing the
            same values as the Series/DataFrame.

        Notes
        -----
        To compute the quantiles, first, the weights are summed over
        exact ties yielding distinct data values y_1 < y_2 < ..., and
        corresponding weights w_1, w_2, ....  Let s_j denote the sum
        of the first j weights, and let W denote the sum of all the
        weights.  For a probability point p, if pW falls strictly
        between s_j and s_{j+1} then the estimated quantile is
        y_{j+1}.  If pW = s_j then the estimated quantile is (y_j +
        y_{j+1})/2.  If pW < p_1 then the estimated quantile is y_1.

        References
        ----------
        SAS documentation for weighted quantiles:

        https://support.sas.com/documentation/cdl/en/procstat/63104/HTML/default/viewer.htm#procstat_univariate_sect028.htm
        """

        import pandas as pd

        probs = np.asarray(probs)
        probs = np.atleast_1d(probs)

        if self.data.ndim == 1:
            rslt = self._quantile(self.data, probs)
            if return_pandas:
                rslt = pd.Series(rslt, index=probs)
        else:
            rslt = []
            for vec in self.data.T:
                rslt.append(self._quantile(vec, probs))
            rslt = np.column_stack(rslt)
            if return_pandas:
                columns = ["col%d" % (j + 1) for j in range(rslt.shape[1])]
                rslt = pd.DataFrame(data=rslt, columns=columns, index=probs)

        if return_pandas:
            rslt.index.name = "p"

        return rslt

    def _quantile(self, vec, probs):
        # Helper function to calculate weighted quantiles for one column.
        # Follows definition from SAS documentation.
        # Returns ndarray

        import pandas as pd

        # Aggregate over ties
        df = pd.DataFrame(index=np.arange(len(self.weights)))
        df["weights"] = self.weights
        df["vec"] = vec
        dfg = df.groupby("vec").agg("sum")
        weights = dfg.values[:, 0]
        values = np.asarray(dfg.index)

        cweights = np.cumsum(weights)
        totwt = cweights[-1]
        targets = probs * totwt
        ii = np.searchsorted(cweights, targets)

        rslt = values[ii]

        # Exact hits
        jj = np.flatnonzero(np.abs(targets - cweights[ii]) < 1e-10)
        jj = jj[ii[jj] < len(cweights) - 1]
        rslt[jj] = (values[ii[jj]] + values[ii[jj] + 1]) / 2

        return rslt

    def tconfint_mean(self, alpha=0.05, alternative="two-sided"):
        """two-sided confidence interval for weighted mean of data

        If the data is 2d, then these are separate confidence intervals
        for each column.

        Parameters
        ----------
        alpha : float
            significance level for the confidence interval, coverage is
            ``1-alpha``
        alternative : str
            This specifies the alternative hypothesis for the test that
            corresponds to the confidence interval.
            The alternative hypothesis, H1, has to be one of the following

              'two-sided': H1: mean not equal to value (default)
              'larger' :   H1: mean larger than value
              'smaller' :  H1: mean smaller than value

        Returns
        -------
        lower, upper : floats or ndarrays
            lower and upper bound of confidence interval

        Notes
        -----
        In a previous version, statsmodels 0.4, alpha was the confidence
        level, e.g. 0.95
        """
        # TODO: add asymmetric
        dof = self.sum_weights - 1
        ci = _tconfint_generic(
            self.mean, self.std_mean, dof, alpha, alternative
        )
        return ci

    def zconfint_mean(self, alpha=0.05, alternative="two-sided"):
        """two-sided confidence interval for weighted mean of data

        Confidence interval is based on normal distribution.
        If the data is 2d, then these are separate confidence intervals
        for each column.

        Parameters
        ----------
        alpha : float
            significance level for the confidence interval, coverage is
            ``1-alpha``
        alternative : str
            This specifies the alternative hypothesis for the test that
            corresponds to the confidence interval.
            The alternative hypothesis, H1, has to be one of the following

              'two-sided': H1: mean not equal to value (default)
              'larger' :   H1: mean larger than value
              'smaller' :  H1: mean smaller than value

        Returns
        -------
        lower, upper : floats or ndarrays
            lower and upper bound of confidence interval

        Notes
        -----
        In a previous version, statsmodels 0.4, alpha was the confidence
        level, e.g. 0.95
        """

        return _zconfint_generic(self.mean, self.std_mean, alpha, alternative)

    def ttest_mean(self, value=0, alternative="two-sided"):
        """ttest of Null hypothesis that mean is equal to value.

        The alternative hypothesis H1 is defined by the following

        - 'two-sided': H1: mean not equal to value
        - 'larger' :   H1: mean larger than value
        - 'smaller' :  H1: mean smaller than value

        Parameters
        ----------
        value : float or array
            the hypothesized value for the mean
        alternative : str
            The alternative hypothesis, H1, has to be one of the following:

              - 'two-sided': H1: mean not equal to value (default)
              - 'larger' :   H1: mean larger than value
              - 'smaller' :  H1: mean smaller than value

        Returns
        -------
        tstat : float
            test statistic
        pvalue : float
            pvalue of the t-test
        df : int or float

        """
        # TODO: check direction with R, smaller=less, larger=greater
        tstat = (self.mean - value) / self.std_mean
        dof = self.sum_weights - 1
        # TODO: use outsourced
        if alternative == "two-sided":
            pvalue = stats.t.sf(np.abs(tstat), dof) * 2
        elif alternative == "larger":
            pvalue = stats.t.sf(tstat, dof)
        elif alternative == "smaller":
            pvalue = stats.t.cdf(tstat, dof)
        else:
            raise ValueError("alternative not recognized")

        return tstat, pvalue, dof

    def ttost_mean(self, low, upp):
        """test of (non-)equivalence of one sample

        TOST: two one-sided t tests

        null hypothesis:  m < low or m > upp
        alternative hypothesis:  low < m < upp

        where m is the expected value of the sample (mean of the population).

        If the pvalue is smaller than a threshold, say 0.05, then we reject the
        hypothesis that the expected value of the sample (mean of the
        population) is outside of the interval given by thresholds low and upp.

        Parameters
        ----------
        low, upp : float
            equivalence interval low < mean < upp

        Returns
        -------
        pvalue : float
            pvalue of the non-equivalence test
        t1, pv1, df1 : tuple
            test statistic, pvalue and degrees of freedom for lower threshold
            test
        t2, pv2, df2 : tuple
            test statistic, pvalue and degrees of freedom for upper threshold
            test

        """

        t1, pv1, df1 = self.ttest_mean(low, alternative="larger")
        t2, pv2, df2 = self.ttest_mean(upp, alternative="smaller")
        return np.maximum(pv1, pv2), (t1, pv1, df1), (t2, pv2, df2)

    def ztest_mean(self, value=0, alternative="two-sided"):
        """z-test of Null hypothesis that mean is equal to value.

        The alternative hypothesis H1 is defined by the following
        'two-sided': H1: mean not equal to value
        'larger' :   H1: mean larger than value
        'smaller' :  H1: mean smaller than value

        Parameters
        ----------
        value : float or array
            the hypothesized value for the mean
        alternative : str
            The alternative hypothesis, H1, has to be one of the following

              'two-sided': H1: mean not equal to value (default)
              'larger' :   H1: mean larger than value
              'smaller' :  H1: mean smaller than value

        Returns
        -------
        tstat : float
            test statistic
        pvalue : float
            pvalue of the t-test

        Notes
        -----
        This uses the same degrees of freedom correction as the t-test in the
        calculation of the standard error of the mean, i.e it uses
        `(sum_weights - 1)` instead of `sum_weights` in the denominator.
        See Examples below for the difference.

        Examples
        --------

        z-test on a proportion, with 20 observations, 15 of those are our event

        >>> import statsmodels.api as sm
        >>> x1 = [0, 1]
        >>> w1 = [5, 15]
        >>> d1 = sm.stats.DescrStatsW(x1, w1)
        >>> d1.ztest_mean(0.5)
        (2.5166114784235836, 0.011848940928347452)

        This differs from the proportions_ztest because of the degrees of
        freedom correction:
        >>> sm.stats.proportions_ztest(15, 20.0, value=0.5)
        (2.5819888974716112, 0.009823274507519247).

        We can replicate the results from ``proportions_ztest`` if we increase
        the weights to have artificially one more observation:

        >>> sm.stats.DescrStatsW(x1, np.array(w1)*21./20).ztest_mean(0.5)
        (2.5819888974716116, 0.0098232745075192366)
        """
        tstat = (self.mean - value) / self.std_mean
        # TODO: use outsourced
        if alternative == "two-sided":
            pvalue = stats.norm.sf(np.abs(tstat)) * 2
        elif alternative == "larger":
            pvalue = stats.norm.sf(tstat)
        elif alternative == "smaller":
            pvalue = stats.norm.cdf(tstat)

        return tstat, pvalue

    def ztost_mean(self, low, upp):
        """test of (non-)equivalence of one sample, based on z-test

        TOST: two one-sided z-tests

        null hypothesis:  m < low or m > upp
        alternative hypothesis:  low < m < upp

        where m is the expected value of the sample (mean of the population).

        If the pvalue is smaller than a threshold, say 0.05, then we reject the
        hypothesis that the expected value of the sample (mean of the
        population) is outside of the interval given by thresholds low and upp.

        Parameters
        ----------
        low, upp : float
            equivalence interval low < mean < upp

        Returns
        -------
        pvalue : float
            pvalue of the non-equivalence test
        t1, pv1 : tuple
            test statistic and p-value for lower threshold test
        t2, pv2 : tuple
            test statistic and p-value for upper threshold test

        """

        t1, pv1 = self.ztest_mean(low, alternative="larger")
        t2, pv2 = self.ztest_mean(upp, alternative="smaller")
        return np.maximum(pv1, pv2), (t1, pv1), (t2, pv2)

    def get_compare(self, other, weights=None):
        """return an instance of CompareMeans with self and other

        Parameters
        ----------
        other : array_like or instance of DescrStatsW
            If array_like then this creates an instance of DescrStatsW with
            the given weights.
        weights : None or array
            weights are only used if other is not an instance of DescrStatsW

        Returns
        -------
        cm : instance of CompareMeans
            the instance has self attached as d1 and other as d2.

        See Also
        --------
        CompareMeans

        """
        if not isinstance(other, self.__class__):
            d2 = DescrStatsW(other, weights)
        else:
            d2 = other
        return CompareMeans(self, d2)

    def asrepeats(self):
        """get array that has repeats given by floor(weights)

        observations with weight=0 are dropped

        """
        w_int = np.floor(self.weights).astype(int)
        return np.repeat(self.data, w_int, axis=0)


def _tstat_generic(value1, value2, std_diff, dof, alternative, diff=0):
    """generic ttest based on summary statistic

    The test statistic is :
        tstat = (value1 - value2 - diff) / std_diff

    and is assumed to be t-distributed with ``dof`` degrees of freedom.

    Parameters
    ----------
    value1 : float or ndarray
        Value, for example mean, of the first sample.
    value2 : float or ndarray
        Value, for example mean, of the second sample.
    std_diff : float or ndarray
        Standard error of the difference value1 - value2
    dof : int or float
        Degrees of freedom
    alternative : str
        The alternative hypothesis, H1, has to be one of the following

           * 'two-sided' : H1: ``value1 - value2 - diff`` not equal to 0.
           * 'larger' :   H1: ``value1 - value2 - diff > 0``
           * 'smaller' :  H1: ``value1 - value2 - diff < 0``

    diff : float
        value of difference ``value1 - value2`` under the null hypothesis

    Returns
    -------
    tstat : float or ndarray
        Test statistic.
    pvalue : float or ndarray
        P-value of the hypothesis test assuming that the test statistic is
        t-distributed with ``df`` degrees of freedom.
    """

    tstat = (value1 - value2 - diff) / std_diff
    if alternative in ["two-sided", "2-sided", "2s"]:
        pvalue = stats.t.sf(np.abs(tstat), dof) * 2
    elif alternative in ["larger", "l"]:
        pvalue = stats.t.sf(tstat, dof)
    elif alternative in ["smaller", "s"]:
        pvalue = stats.t.cdf(tstat, dof)
    else:
        raise ValueError("invalid alternative")
    return tstat, pvalue


def _tconfint_generic(mean, std_mean, dof, alpha, alternative):
    """generic t-confint based on summary statistic

    Parameters
    ----------
    mean : float or ndarray
        Value, for example mean, of the first sample.
    std_mean : float or ndarray
        Standard error of the difference value1 - value2
    dof : int or float
        Degrees of freedom
    alpha : float
        Significance level for the confidence interval, coverage is
        ``1-alpha``.
    alternative : str
        The alternative hypothesis, H1, has to be one of the following

           * 'two-sided' : H1: ``value1 - value2 - diff`` not equal to 0.
           * 'larger' :   H1: ``value1 - value2 - diff > 0``
           * 'smaller' :  H1: ``value1 - value2 - diff < 0``

    Returns
    -------
    lower : float or ndarray
        Lower confidence limit. This is -inf for the one-sided alternative
        "smaller".
    upper : float or ndarray
        Upper confidence limit. This is inf for the one-sided alternative
        "larger".
    """

    if alternative in ["two-sided", "2-sided", "2s"]:
        tcrit = stats.t.ppf(1 - alpha / 2.0, dof)
        lower = mean - tcrit * std_mean
        upper = mean + tcrit * std_mean
    elif alternative in ["larger", "l"]:
        tcrit = stats.t.ppf(alpha, dof)
        lower = mean + tcrit * std_mean
        upper = np.inf
    elif alternative in ["smaller", "s"]:
        tcrit = stats.t.ppf(1 - alpha, dof)
        lower = -np.inf
        upper = mean + tcrit * std_mean
    else:
        raise ValueError("invalid alternative")

    return lower, upper


def _zstat_generic(value1, value2, std_diff, alternative, diff=0):
    """generic (normal) z-test based on summary statistic

    The test statistic is :
        tstat = (value1 - value2 - diff) / std_diff

    and is assumed to be normally distributed.

    Parameters
    ----------
    value1 : float or ndarray
        Value, for example mean, of the first sample.
    value2 : float or ndarray
        Value, for example mean, of the second sample.
    std_diff : float or ndarray
        Standard error of the difference value1 - value2
    alternative : str
        The alternative hypothesis, H1, has to be one of the following

           * 'two-sided' : H1: ``value1 - value2 - diff`` not equal to 0.
           * 'larger' :   H1: ``value1 - value2 - diff > 0``
           * 'smaller' :  H1: ``value1 - value2 - diff < 0``

    diff : float
        value of difference ``value1 - value2`` under the null hypothesis

    Returns
    -------
    tstat : float or ndarray
        Test statistic.
    pvalue : float or ndarray
        P-value of the hypothesis test assuming that the test statistic is
        t-distributed with ``df`` degrees of freedom.
    """

    zstat = (value1 - value2 - diff) / std_diff
    if alternative in ["two-sided", "2-sided", "2s"]:
        pvalue = stats.norm.sf(np.abs(zstat)) * 2
    elif alternative in ["larger", "l"]:
        pvalue = stats.norm.sf(zstat)
    elif alternative in ["smaller", "s"]:
        pvalue = stats.norm.cdf(zstat)
    else:
        raise ValueError("invalid alternative")
    return zstat, pvalue


def _zstat_generic2(value, std, alternative):
    """generic (normal) z-test based on summary statistic

    The test statistic is :
        zstat = value / std

    and is assumed to be normally distributed with standard deviation ``std``.

    Parameters
    ----------
    value : float or ndarray
        Value of a sample statistic, for example mean.
    value2 : float or ndarray
        Value, for example mean, of the second sample.
    std : float or ndarray
        Standard error of the sample statistic value.
    alternative : str
        The alternative hypothesis, H1, has to be one of the following

           * 'two-sided' : H1: ``value1 - value2 - diff`` not equal to 0.
           * 'larger' :   H1: ``value1 - value2 - diff > 0``
           * 'smaller' :  H1: ``value1 - value2 - diff < 0``

    Returns
    -------
    zstat : float or ndarray
        Test statistic.
    pvalue : float or ndarray
        P-value of the hypothesis test assuming that the test statistic is
        normally distributed.
    """

    zstat = value / std
    if alternative in ["two-sided", "2-sided", "2s"]:
        pvalue = stats.norm.sf(np.abs(zstat)) * 2
    elif alternative in ["larger", "l"]:
        pvalue = stats.norm.sf(zstat)
    elif alternative in ["smaller", "s"]:
        pvalue = stats.norm.cdf(zstat)
    else:
        raise ValueError("invalid alternative")
    return zstat, pvalue


def _zconfint_generic(mean, std_mean, alpha, alternative):
    """generic normal-confint based on summary statistic

    Parameters
    ----------
    mean : float or ndarray
        Value, for example mean, of the first sample.
    std_mean : float or ndarray
        Standard error of the difference value1 - value2
    alpha : float
        Significance level for the confidence interval, coverage is
        ``1-alpha``
    alternative : str
        The alternative hypothesis, H1, has to be one of the following

           * 'two-sided' : H1: ``value1 - value2 - diff`` not equal to 0.
           * 'larger' :   H1: ``value1 - value2 - diff > 0``
           * 'smaller' :  H1: ``value1 - value2 - diff < 0``

    Returns
    -------
    lower : float or ndarray
        Lower confidence limit. This is -inf for the one-sided alternative
        "smaller".
    upper : float or ndarray
        Upper confidence limit. This is inf for the one-sided alternative
        "larger".
    """

    if alternative in ["two-sided", "2-sided", "2s"]:
        zcrit = stats.norm.ppf(1 - alpha / 2.0)
        lower = mean - zcrit * std_mean
        upper = mean + zcrit * std_mean
    elif alternative in ["larger", "l"]:
        zcrit = stats.norm.ppf(alpha)
        lower = mean + zcrit * std_mean
        upper = np.inf
    elif alternative in ["smaller", "s"]:
        zcrit = stats.norm.ppf(1 - alpha)
        lower = -np.inf
        upper = mean + zcrit * std_mean
    else:
        raise ValueError("invalid alternative")

    return lower, upper


class CompareMeans:
    """class for two sample comparison

    The tests and the confidence interval work for multi-endpoint comparison:
    If d1 and d2 have the same number of rows, then each column of the data
    in d1 is compared with the corresponding column in d2.

    Parameters
    ----------
    d1, d2 : instances of DescrStatsW

    Notes
    -----
    The result for the statistical tests and the confidence interval are
    independent of the user specified ddof.

    TODO: Extend to any number of groups or write a version that works in that
    case, like in SAS and SPSS.

    """

    def __init__(self, d1, d2):
        """assume d1, d2 hold the relevant attributes

        """
        self.d1 = d1
        self.d2 = d2
        # assume nobs is available

    #        if not hasattr(self.d1, 'nobs'):
    #            d1.nobs1 = d1.sum_weights.astype(float)  #float just to make sure
    #        self.nobs2 = d2.sum_weights.astype(float)

    @classmethod
    def from_data(
        cls, data1, data2, weights1=None, weights2=None, ddof1=0, ddof2=0
    ):
        """construct a CompareMeans object from data

        Parameters
        ----------
        data1, data2 : array_like, 1-D or 2-D
            compared datasets
        weights1, weights2 : None or 1-D ndarray
            weights for each observation of data1 and data2 respectively,
            with same length as zero axis of corresponding dataset.
        ddof1, ddof2 : int
            default ddof1=0, ddof2=0, degrees of freedom for data1,
            data2 respectively.

        Returns
        -------
        A CompareMeans instance.

        """
        return cls(
            DescrStatsW(data1, weights=weights1, ddof=ddof1),
            DescrStatsW(data2, weights=weights2, ddof=ddof2),
        )

    def summary(self, use_t=True, alpha=0.05, usevar="pooled", value=0):
        """summarize the results of the hypothesis test

        Parameters
        ----------
        use_t : bool, optional
            if use_t is True, then t test results are returned
            if use_t is False, then z test results are returned
        alpha : float
            significance level for the confidence interval, coverage is
            ``1-alpha``
        usevar : str, 'pooled' or 'unequal'
            If ``pooled``, then the standard deviation of the samples is
            assumed to be the same. If ``unequal``, then the variance of
            Welch ttest will be used, and the degrees of freedom are those
            of Satterthwaite if ``use_t`` is True.
        value : float
            difference between the means under the Null hypothesis.

        Returns
        -------
        smry : SimpleTable

        """

        d1 = self.d1
        d2 = self.d2

        confint_percents = 100 - alpha * 100

        if use_t:
            tstat, pvalue, _ = self.ttest_ind(usevar=usevar, value=value)
            lower, upper = self.tconfint_diff(alpha=alpha, usevar=usevar)
        else:
            tstat, pvalue = self.ztest_ind(usevar=usevar, value=value)
            lower, upper = self.zconfint_diff(alpha=alpha, usevar=usevar)

        if usevar == "pooled":
            std_err = self.std_meandiff_pooledvar
        else:
            std_err = self.std_meandiff_separatevar

        std_err = np.atleast_1d(std_err)
        tstat = np.atleast_1d(tstat)
        pvalue = np.atleast_1d(pvalue)
        lower = np.atleast_1d(lower)
        upper = np.atleast_1d(upper)
        conf_int = np.column_stack((lower, upper))
        params = np.atleast_1d(d1.mean - d2.mean - value)

        title = "Test for equality of means"
        yname = "y"  # not used in params_frame
        xname = ["subset #%d" % (ii + 1) for ii in range(tstat.shape[0])]

        from statsmodels.iolib.summary import summary_params

        return summary_params(
            (None, params, std_err, tstat, pvalue, conf_int),
            alpha=alpha,
            use_t=use_t,
            yname=yname,
            xname=xname,
            title=title,
        )

    @cache_readonly
    def std_meandiff_separatevar(self):
        # this uses ``_var`` to use ddof=0 for formula
        d1 = self.d1
        d2 = self.d2
        return np.sqrt(d1._var / (d1.nobs - 1) + d2._var / (d2.nobs - 1))

    @cache_readonly
    def std_meandiff_pooledvar(self):
        """variance assuming equal variance in both data sets

        """
        # this uses ``_var`` to use ddof=0 for formula

        d1 = self.d1
        d2 = self.d2
        # could make var_pooled into attribute
        var_pooled = (
            (d1.sumsquares + d2.sumsquares)
            /
            # (d1.nobs - d1.ddof + d2.nobs - d2.ddof))
            (d1.nobs - 1 + d2.nobs - 1)
        )
        return np.sqrt(var_pooled * (1.0 / d1.nobs + 1.0 / d2.nobs))

    def dof_satt(self):
        """degrees of freedom of Satterthwaite for unequal variance
        """
        d1 = self.d1
        d2 = self.d2
        # this follows blindly the SPSS manual
        # except I use  ``_var`` which has ddof=0
        sem1 = d1._var / (d1.nobs - 1)
        sem2 = d2._var / (d2.nobs - 1)
        semsum = sem1 + sem2
        z1 = (sem1 / semsum) ** 2 / (d1.nobs - 1)
        z2 = (sem2 / semsum) ** 2 / (d2.nobs - 1)
        dof = 1.0 / (z1 + z2)
        return dof

    def ttest_ind(self, alternative="two-sided", usevar="pooled", value=0):
        """ttest for the null hypothesis of identical means

        this should also be the same as onewaygls, except for ddof differences

        Parameters
        ----------
        x1 : array_like, 1-D or 2-D
            first of the two independent samples, see notes for 2-D case
        x2 : array_like, 1-D or 2-D
            second of the two independent samples, see notes for 2-D case
        alternative : str
            The alternative hypothesis, H1, has to be one of the following
            'two-sided': H1: difference in means not equal to value (default)
            'larger' :   H1: difference in means larger than value
            'smaller' :  H1: difference in means smaller than value

        usevar : str, 'pooled' or 'unequal'
            If ``pooled``, then the standard deviation of the samples is assumed to be
            the same. If ``unequal``, then Welch ttest with Satterthwait degrees
            of freedom is used
        value : float
            difference between the means under the Null hypothesis.


        Returns
        -------
        tstat : float
            test statistic
        pvalue : float
            pvalue of the t-test
        df : int or float
            degrees of freedom used in the t-test

        Notes
        -----
        The result is independent of the user specified ddof.

        """
        d1 = self.d1
        d2 = self.d2

        if usevar == "pooled":
            stdm = self.std_meandiff_pooledvar
            dof = d1.nobs - 1 + d2.nobs - 1
        elif usevar == "unequal":
            stdm = self.std_meandiff_separatevar
            dof = self.dof_satt()
        else:
            raise ValueError('usevar can only be "pooled" or "unequal"')

        tstat, pval = _tstat_generic(
            d1.mean, d2.mean, stdm, dof, alternative, diff=value
        )

        return tstat, pval, dof

    def ztest_ind(self, alternative="two-sided", usevar="pooled", value=0):
        """z-test for the null hypothesis of identical means

        Parameters
        ----------
        x1 : array_like, 1-D or 2-D
            first of the two independent samples, see notes for 2-D case
        x2 : array_like, 1-D or 2-D
            second of the two independent samples, see notes for 2-D case
        alternative : str
            The alternative hypothesis, H1, has to be one of the following
            'two-sided': H1: difference in means not equal to value (default)
            'larger' :   H1: difference in means larger than value
            'smaller' :  H1: difference in means smaller than value

        usevar : str, 'pooled' or 'unequal'
            If ``pooled``, then the standard deviation of the samples is assumed to be
            the same. If ``unequal``, then the standard deviations of the samples may
            be different.
        value : float
            difference between the means under the Null hypothesis.

        Returns
        -------
        tstat : float
            test statistic
        pvalue : float
            pvalue of the z-test

        """
        d1 = self.d1
        d2 = self.d2

        if usevar == "pooled":
            stdm = self.std_meandiff_pooledvar
        elif usevar == "unequal":
            stdm = self.std_meandiff_separatevar
        else:
            raise ValueError('usevar can only be "pooled" or "unequal"')

        tstat, pval = _zstat_generic(
            d1.mean, d2.mean, stdm, alternative, diff=value
        )

        return tstat, pval

    def tconfint_diff(
        self, alpha=0.05, alternative="two-sided", usevar="pooled"
    ):
        """confidence interval for the difference in means

        Parameters
        ----------
        alpha : float
            significance level for the confidence interval, coverage is
            ``1-alpha``
        alternative : str
            This specifies the alternative hypothesis for the test that
            corresponds to the confidence interval.
            The alternative hypothesis, H1, has to be one of the following :

            'two-sided': H1: difference in means not equal to value (default)
            'larger' :   H1: difference in means larger than value
            'smaller' :  H1: difference in means smaller than value

        usevar : str, 'pooled' or 'unequal'
            If ``pooled``, then the standard deviation of the samples is assumed to be
            the same. If ``unequal``, then Welch ttest with Satterthwait degrees
            of freedom is used

        Returns
        -------
        lower, upper : floats
            lower and upper limits of the confidence interval

        Notes
        -----
        The result is independent of the user specified ddof.

        """
        d1 = self.d1
        d2 = self.d2
        diff = d1.mean - d2.mean
        if usevar == "pooled":
            std_diff = self.std_meandiff_pooledvar
            dof = d1.nobs - 1 + d2.nobs - 1
        elif usevar == "unequal":
            std_diff = self.std_meandiff_separatevar
            dof = self.dof_satt()
        else:
            raise ValueError('usevar can only be "pooled" or "unequal"')

        res = _tconfint_generic(
            diff, std_diff, dof, alpha=alpha, alternative=alternative
        )
        return res

    def zconfint_diff(
        self, alpha=0.05, alternative="two-sided", usevar="pooled"
    ):
        """confidence interval for the difference in means

        Parameters
        ----------
        alpha : float
            significance level for the confidence interval, coverage is
            ``1-alpha``
        alternative : str
            This specifies the alternative hypothesis for the test that
            corresponds to the confidence interval.
            The alternative hypothesis, H1, has to be one of the following :

            'two-sided': H1: difference in means not equal to value (default)
            'larger' :   H1: difference in means larger than value
            'smaller' :  H1: difference in means smaller than value

        usevar : str, 'pooled' or 'unequal'
            If ``pooled``, then the standard deviation of the samples is assumed to be
            the same. If ``unequal``, then Welch ttest with Satterthwait degrees
            of freedom is used

        Returns
        -------
        lower, upper : floats
            lower and upper limits of the confidence interval

        Notes
        -----
        The result is independent of the user specified ddof.

        """
        d1 = self.d1
        d2 = self.d2
        diff = d1.mean - d2.mean
        if usevar == "pooled":
            std_diff = self.std_meandiff_pooledvar
        elif usevar == "unequal":
            std_diff = self.std_meandiff_separatevar
        else:
            raise ValueError('usevar can only be "pooled" or "unequal"')

        res = _zconfint_generic(
            diff, std_diff, alpha=alpha, alternative=alternative
        )
        return res

    def ttost_ind(self, low, upp, usevar="pooled"):
        """
        test of equivalence for two independent samples, base on t-test

        Parameters
        ----------
        low, upp : float
            equivalence interval low < m1 - m2 < upp
        usevar : str, 'pooled' or 'unequal'
            If ``pooled``, then the standard deviation of the samples is assumed to be
            the same. If ``unequal``, then Welch ttest with Satterthwait degrees
            of freedom is used

        Returns
        -------
        pvalue : float
            pvalue of the non-equivalence test
        t1, pv1 : tuple of floats
            test statistic and pvalue for lower threshold test
        t2, pv2 : tuple of floats
            test statistic and pvalue for upper threshold test
        """
        tt1 = self.ttest_ind(alternative="larger", usevar=usevar, value=low)
        tt2 = self.ttest_ind(alternative="smaller", usevar=usevar, value=upp)
        # TODO: remove tuple return, use same as for function tost_ind
        return np.maximum(tt1[1], tt2[1]), (tt1, tt2)

    def ztost_ind(self, low, upp, usevar="pooled"):
        """
        test of equivalence for two independent samples, based on z-test

        Parameters
        ----------
        low, upp : float
            equivalence interval low < m1 - m2 < upp
        usevar : str, 'pooled' or 'unequal'
            If ``pooled``, then the standard deviation of the samples is assumed to be
            the same. If ``unequal``, then Welch ttest with Satterthwait degrees
            of freedom is used

        Returns
        -------
        pvalue : float
            pvalue of the non-equivalence test
        t1, pv1 : tuple of floats
            test statistic and pvalue for lower threshold test
        t2, pv2 : tuple of floats
            test statistic and pvalue for upper threshold test
        """
        tt1 = self.ztest_ind(alternative="larger", usevar=usevar, value=low)
        tt2 = self.ztest_ind(alternative="smaller", usevar=usevar, value=upp)
        # TODO: remove tuple return, use same as for function tost_ind
        return np.maximum(tt1[1], tt2[1]), tt1, tt2

    # tost.__doc__ = tost_ind.__doc__


# does not work for 2d, does not take weights into account
##    def test_equal_var(self):
##        """Levene test for independence
##
##        """
##        d1 = self.d1
##        d2 = self.d2
##        #rewrite this, for now just use scipy.stats
##        return stats.levene(d1.data, d2.data)


def ttest_ind(
    x1,
    x2,
    alternative="two-sided",
    usevar="pooled",
    weights=(None, None),
    value=0,
):
    """ttest independent sample

    Convenience function that uses the classes and throws away the intermediate
    results,
    compared to scipy stats: drops axis option, adds alternative, usevar, and
    weights option.

    Parameters
    ----------
    x1 : array_like, 1-D or 2-D
        first of the two independent samples, see notes for 2-D case
    x2 : array_like, 1-D or 2-D
        second of the two independent samples, see notes for 2-D case
    alternative : str
        The alternative hypothesis, H1, has to be one of the following

           * 'two-sided' (default): H1: difference in means not equal to value
           * 'larger' :   H1: difference in means larger than value
           * 'smaller' :  H1: difference in means smaller than value

    usevar : str, 'pooled' or 'unequal'
        If ``pooled``, then the standard deviation of the samples is assumed to be
        the same. If ``unequal``, then Welch ttest with Satterthwait degrees
        of freedom is used
    weights : tuple of None or ndarrays
        Case weights for the two samples. For details on weights see
        ``DescrStatsW``
    value : float
        difference between the means under the Null hypothesis.


    Returns
    -------
    tstat : float
        test statistic
    pvalue : float
        pvalue of the t-test
    df : int or float
        degrees of freedom used in the t-test

    """
    cm = CompareMeans(
        DescrStatsW(x1, weights=weights[0], ddof=0),
        DescrStatsW(x2, weights=weights[1], ddof=0),
    )
    tstat, pval, dof = cm.ttest_ind(
        alternative=alternative, usevar=usevar, value=value
    )

    return tstat, pval, dof


def ttost_ind(
    x1, x2, low, upp, usevar="pooled", weights=(None, None), transform=None
):
    """test of (non-)equivalence for two independent samples

    TOST: two one-sided t tests

    null hypothesis:  m1 - m2 < low or m1 - m2 > upp
    alternative hypothesis:  low < m1 - m2 < upp

    where m1, m2 are the means, expected values of the two samples.

    If the pvalue is smaller than a threshold, say 0.05, then we reject the
    hypothesis that the difference between the two samples is larger than the
    the thresholds given by low and upp.

    Parameters
    ----------
    x1 : array_like, 1-D or 2-D
        first of the two independent samples, see notes for 2-D case
    x2 : array_like, 1-D or 2-D
        second of the two independent samples, see notes for 2-D case
    low, upp : float
        equivalence interval low < m1 - m2 < upp
    usevar : str, 'pooled' or 'unequal'
        If ``pooled``, then the standard deviation of the samples is assumed to be
        the same. If ``unequal``, then Welch ttest with Satterthwait degrees
        of freedom is used
    weights : tuple of None or ndarrays
        Case weights for the two samples. For details on weights see
        ``DescrStatsW``
    transform : None or function
        If None (default), then the data is not transformed. Given a function,
        sample data and thresholds are transformed. If transform is log, then
        the equivalence interval is in ratio: low < m1 / m2 < upp

    Returns
    -------
    pvalue : float
        pvalue of the non-equivalence test
    t1, pv1 : tuple of floats
        test statistic and pvalue for lower threshold test
    t2, pv2 : tuple of floats
        test statistic and pvalue for upper threshold test

    Notes
    -----
    The test rejects if the 2*alpha confidence interval for the difference
    is contained in the ``(low, upp)`` interval.

    This test works also for multi-endpoint comparisons: If d1 and d2
    have the same number of columns, then each column of the data in d1 is
    compared with the corresponding column in d2. This is the same as
    comparing each of the corresponding columns separately. Currently no
    multi-comparison correction is used. The raw p-values reported here can
    be correction with the functions in ``multitest``.

    """

    if transform:
        if transform is np.log:
            # avoid hstack in special case
            x1 = transform(x1)
            x2 = transform(x2)
        else:
            # for transforms like rankdata that will need both datasets
            # concatenate works for stacking 1d and 2d arrays
            xx = transform(np.concatenate((x1, x2), 0))
            x1 = xx[: len(x1)]
            x2 = xx[len(x1) :]
        low = transform(low)
        upp = transform(upp)
    cm = CompareMeans(
        DescrStatsW(x1, weights=weights[0], ddof=0),
        DescrStatsW(x2, weights=weights[1], ddof=0),
    )
    pval, res = cm.ttost_ind(low, upp, usevar=usevar)
    return pval, res[0], res[1]


def ttost_paired(x1, x2, low, upp, transform=None, weights=None):
    """test of (non-)equivalence for two dependent, paired sample

    TOST: two one-sided t tests

    null hypothesis:  md < low or md > upp
    alternative hypothesis:  low < md < upp

    where md is the mean, expected value of the difference x1 - x2

    If the pvalue is smaller than a threshold,say 0.05, then we reject the
    hypothesis that the difference between the two samples is larger than the
    the thresholds given by low and upp.

    Parameters
    ----------
    x1 : array_like
        first of the two independent samples
    x2 : array_like
        second of the two independent samples
    low, upp : float
        equivalence interval low < mean of difference < upp
    weights : None or ndarray
        case weights for the two samples. For details on weights see
        ``DescrStatsW``
    transform : None or function
        If None (default), then the data is not transformed. Given a function
        sample data and thresholds are transformed. If transform is log the
        the equivalence interval is in ratio: low < x1 / x2 < upp

    Returns
    -------
    pvalue : float
        pvalue of the non-equivalence test
    t1, pv1, df1 : tuple
        test statistic, pvalue and degrees of freedom for lower threshold test
    t2, pv2, df2 : tuple
        test statistic, pvalue and degrees of freedom for upper threshold test

    """

    if transform:
        if transform is np.log:
            # avoid hstack in special case
            x1 = transform(x1)
            x2 = transform(x2)
        else:
            # for transforms like rankdata that will need both datasets
            # concatenate works for stacking 1d and 2d arrays
            xx = transform(np.concatenate((x1, x2), 0))
            x1 = xx[: len(x1)]
            x2 = xx[len(x1) :]
        low = transform(low)
        upp = transform(upp)
    dd = DescrStatsW(x1 - x2, weights=weights, ddof=0)
    t1, pv1, df1 = dd.ttest_mean(low, alternative="larger")
    t2, pv2, df2 = dd.ttest_mean(upp, alternative="smaller")
    return np.maximum(pv1, pv2), (t1, pv1, df1), (t2, pv2, df2)


def ztest(
    x1, x2=None, value=0, alternative="two-sided", usevar="pooled", ddof=1.0
):
    """test for mean based on normal distribution, one or two samples

    In the case of two samples, the samples are assumed to be independent.

    Parameters
    ----------
    x1 : array_like, 1-D or 2-D
        first of the two independent samples
    x2 : array_like, 1-D or 2-D
        second of the two independent samples
    value : float
        In the one sample case, value is the mean of x1 under the Null
        hypothesis.
        In the two sample case, value is the difference between mean of x1 and
        mean of x2 under the Null hypothesis. The test statistic is
        `x1_mean - x2_mean - value`.
    alternative : str
        The alternative hypothesis, H1, has to be one of the following

           'two-sided': H1: difference in means not equal to value (default)
           'larger' :   H1: difference in means larger than value
           'smaller' :  H1: difference in means smaller than value

    usevar : str, 'pooled' or 'unequal'
        If ``pooled``, then the standard deviation of the samples is assumed to be
        the same. If ``unequal``, then the standard deviation of the sample is
        assumed to be different.
    ddof : int
        Degrees of freedom use in the calculation of the variance of the mean
        estimate. In the case of comparing means this is one, however it can
        be adjusted for testing other statistics (proportion, correlation)

    Returns
    -------
    tstat : float
        test statistic
    pvalue : float
        pvalue of the t-test

    Notes
    -----
    usevar can be pooled or unequal in two sample case

    """
    # TODO: this should delegate to CompareMeans like ttest_ind
    #       However that does not implement ddof

    # usevar can be pooled or unequal

    if usevar not in {"pooled", "unequal"}:
        raise NotImplementedError('usevar can only be "pooled" or "unequal"')

    x1 = np.asarray(x1)
    nobs1 = x1.shape[0]
    x1_mean = x1.mean(0)
    x1_var = x1.var(0)

    if x2 is not None:
        x2 = np.asarray(x2)
        nobs2 = x2.shape[0]
        x2_mean = x2.mean(0)
        x2_var = x2.var(0)
        if usevar == "pooled":
            var = nobs1 * x1_var + nobs2 * x2_var
            var /= nobs1 + nobs2 - 2 * ddof
            var *= 1.0 / nobs1 + 1.0 / nobs2
        elif usevar == "unequal":
            var = x1_var / (nobs1 - ddof) + x2_var / (nobs2 - ddof)
    else:
        var = x1_var / (nobs1 - ddof)
        x2_mean = 0

    std_diff = np.sqrt(var)
    # stat = x1_mean - x2_mean - value
    return _zstat_generic(x1_mean, x2_mean, std_diff, alternative, diff=value)


def zconfint(
    x1,
    x2=None,
    value=0,
    alpha=0.05,
    alternative="two-sided",
    usevar="pooled",
    ddof=1.0,
):
    """confidence interval based on normal distribution z-test

    Parameters
    ----------
    x1 : array_like, 1-D or 2-D
        first of the two independent samples, see notes for 2-D case
    x2 : array_like, 1-D or 2-D
        second of the two independent samples, see notes for 2-D case
    value : float
        In the one sample case, value is the mean of x1 under the Null
        hypothesis.
        In the two sample case, value is the difference between mean of x1 and
        mean of x2 under the Null hypothesis. The test statistic is
        `x1_mean - x2_mean - value`.
    usevar : str, 'pooled'
        Currently, only 'pooled' is implemented.
        If ``pooled``, then the standard deviation of the samples is assumed to be
        the same. see CompareMeans.ztest_ind for different options.
    ddof : int
        Degrees of freedom use in the calculation of the variance of the mean
        estimate. In the case of comparing means this is one, however it can
        be adjusted for testing other statistics (proportion, correlation)

    Notes
    -----
    checked only for 1 sample case

    usevar not implemented, is always pooled in two sample case

    ``value`` shifts the confidence interval so it is centered at
    `x1_mean - x2_mean - value`

    See Also
    --------
    ztest
    CompareMeans

    """
    # usevar is not used, always pooled
    # mostly duplicate code from ztest

    if usevar != "pooled":
        raise NotImplementedError('only usevar="pooled" is implemented')
    x1 = np.asarray(x1)
    nobs1 = x1.shape[0]
    x1_mean = x1.mean(0)
    x1_var = x1.var(0)
    if x2 is not None:
        x2 = np.asarray(x2)
        nobs2 = x2.shape[0]
        x2_mean = x2.mean(0)
        x2_var = x2.var(0)
        var_pooled = nobs1 * x1_var + nobs2 * x2_var
        var_pooled /= nobs1 + nobs2 - 2 * ddof
        var_pooled *= 1.0 / nobs1 + 1.0 / nobs2
    else:
        var_pooled = x1_var / (nobs1 - ddof)
        x2_mean = 0

    std_diff = np.sqrt(var_pooled)
    ci = _zconfint_generic(
        x1_mean - x2_mean - value, std_diff, alpha, alternative
    )
    return ci


def ztost(x1, low, upp, x2=None, usevar="pooled", ddof=1.0):
    """Equivalence test based on normal distribution

    Parameters
    ----------
    x1 : array_like
        one sample or first sample for 2 independent samples
    low, upp : float
        equivalence interval low < m1 - m2 < upp
    x1 : array_like or None
        second sample for 2 independent samples test. If None, then a
        one-sample test is performed.
    usevar : str, 'pooled'
        If `pooled`, then the standard deviation of the samples is assumed to be
        the same. Only `pooled` is currently implemented.

    Returns
    -------
    pvalue : float
        pvalue of the non-equivalence test
    t1, pv1 : tuple of floats
        test statistic and pvalue for lower threshold test
    t2, pv2 : tuple of floats
        test statistic and pvalue for upper threshold test

    Notes
    -----
    checked only for 1 sample case

    """
    tt1 = ztest(
        x1, x2, alternative="larger", usevar=usevar, value=low, ddof=ddof
    )
    tt2 = ztest(
        x1, x2, alternative="smaller", usevar=usevar, value=upp, ddof=ddof
    )
    return (
        np.maximum(tt1[1], tt2[1]),
        tt1,
        tt2,
    )