
    M/Ph                       d Z ddlmZ ddlmZmZ ddlZddlZddlZ	ddl
mZmZ ddlmZmZ ddlmZ ddlmZmZmZmZ dd	lmZ dd
lmZ 	 ddlmZ n.# e$ r& ddl m!Z!m"Z"  edddg          Z# e#e!e"          ZY nw xY wdZ$d e$%                    dd          &                    d          D             Z' e	j(        e'dd                   )                    e*          Z+ e	j,        dd          Z-e+dddf         Z.e+dddddf         Z/e+dddddf         Z0ddZ1ddZ2d Z3d Z4d Z5d  Z6d! Z7d" Z8dd$Z9dd&Z:dd,Z;dd/Z<d0 Z= G d1 d2          Z> G d3 d4          Z? G d5 d6          Z@d7 ZAd8 ZBdd9ZCdd:ZDd; ZEd< ZFdd=ZGdd>ZHdd?ZId@ ZJdA ZKdB ZLdC ZMddFZN G dG dH          ZOdI ZPdJ ZQdK ZReSdLk    
rg dMZTdNeTv rB e	j(        g dOg          jU        e	jV        W                    dPdQ          z   ZX eY e4eX            dReTv sdSeTv rddTlZm[Z[  e[             dUeTv r} e;d'dVdWdWdXddYZ          Z\ e	j(        e\          ]                    d          Z^ eYe^            eYe^d         d[z  de^d         d\z  z
              eYe^dd]         e^d^d                    d_eTv rE e<d`da          Z_ eY e	j`        e_db                      eYe_a                    d                     dceTv r0 eY e1ddddde          df            eY e1dddddge          dh           dieTv r e	j(        g djdgdkdgdldgdldgdmdgdndgdndgdodgdpdgdndgdqdgdqdgdrdgdrdgdsdgdtdgdqdPgdudPgdvdPgdrdPgdsdPgdtdPgdwdPgdpd]gdpd]gdqd]gdxd]gdtd]gdyd]gdzd]gd{d]g          Zbd|  ecdd}          D             Zd ejA        ebdddf                   Zed~  ecdd}          D             Zf e	j(        d edD                       Zgd efD             Zhd efD             Ziejj        k                    d           ejj        l                    d            e	jm        eh          Zn e	jo        d]d          \  ZpZq eYd            eYeneq         enep         z
              e	jr        eneq         enep         z
            ddd         Zseneqes                  enepes                  z
    eYd            eteqes         epes                   D ]n\  ZuZv eYeuev ejw        edeu         edev                               ejx        edeu         edev         d.          \  ZyZz eYeyezdz  ezdz  dk     ezdz  dk                o e	j{        ebdddf         d          \  Z|Z} e	j~        e}          Z e	j~        e}ebdddf                   Zedz  ez  Zebdddf         r                                r                                Z e	j~        e}e          Zedz  ez  dz   Z eee}          ejA        ebdddf                   d            e>ebd          Z eYd            eYej                    eYee}                     e	j{        ebdddf         d          \  ZZ e> e	j        ebdddf         eg          d          Z e	j~        ee)                    e                    Zeedk             Z e* eee                    ZdedPz  ez
                                  edPz  ez
  z  z
  Z ee ej=        ee          d            eYd            eYe            eY e=ee                     ebj        d         ZdZeedz   z  dz  ed[edz
  z  z  z
  Zeedz   z  dz   ej=        ee          z  Z eYd            eteqes         epes                   D ]j\  ZuZv e	j        eheu         ehev         z
            Z e	j        e e	j        degeuevg         z            z            Z eYeueveeeez  eez  dk               k e@ebjU         Zew                                  e>ebd          Z eYd            ecd)          D ]Zue	jV        W                    dWd           e	j(        dd#g          z   jU        \  ZZ ejw        ee          Z e@e	j        eef         e	j         e	j         ee                     e	j         ee                    f                   Zew                                Z eYe e	j        ed                   ed         ez
  eed         z  dz
  d'z             e                    ej                  \  ZZZe                    ejx                  \  ZZZ eYd            eYe            eYd            eYe           e                    ejx        d          \  ZZZ eYd            eYe           deTv re	jV        W                    dd]           e	j(        g d          z   jU        Zd e6ed          \  ZZ e@ee          Ze                    ej                  \  ZZZ eYe           g dg dg dgZd e6ed          \  ZXZ e> e	j        eXeg                    Z eYe                                            e	j(        g d          Z e	j(        g d          Z eOeeddg          Zg dZ eY eedd%                      eY eedd.                      eedd.          Z eddged         dd         dP            edded                                                     eY eedd                      eYd eedd                      eMddPd           dS dS )a	  

from pystatsmodels mailinglist 20100524

Notes:
 - unfinished, unverified, but most parts seem to work in MonteCarlo
 - one example taken from lecture notes looks ok
 - needs cases with non-monotonic inequality for test to see difference between
   one-step, step-up and step-down procedures
 - FDR does not look really better then Bonferoni in the MC examples that I tried
update:
 - now tested against R, stats and multtest,
   I have all of their methods for p-value correction
 - getting Hommel was impossible until I found reference for pvalue correction
 - now, since I have p-values correction, some of the original tests (rej/norej)
   implementation is not really needed anymore. I think I keep it for reference.
   Test procedure for Hommel in development session log
 - I have not updated other functions and classes in here.
   - multtest has some good helper function according to docs
 - still need to update references, the real papers
 - fdr with estimated true hypothesis still missing
 - multiple comparison procedures incomplete or missing
 - I will get multiple comparison for now only for independent case, which might
   be conservative in correlated case (?).


some References:

Gibbons, Jean Dickinson and Chakraborti Subhabrata, 2003, Nonparametric Statistical
Inference, Fourth Edition, Marcel Dekker
    p.363: 10.4 THE KRUSKAL-WALLIS ONE-WAY ANOVA TEST AND MULTIPLE COMPARISONS
    p.367: multiple comparison for kruskal formula used in multicomp.kruskal

Sheskin, David J., 2004, Handbook of Parametric and Nonparametric Statistical
Procedures, 3rd ed., Chapman&Hall/CRC
    Test 21: The Single-Factor Between-Subjects Analysis of Variance
    Test 22: The Kruskal-Wallis One-Way Analysis of Variance by Ranks Test

Zwillinger, Daniel and Stephen Kokoska, 2000, CRC standard probability and
statistics tables and formulae, Chapman&Hall/CRC
    14.9 WILCOXON RANKSUM (MANN WHITNEY) TEST


S. Paul Wright, Adjusted P-Values for Simultaneous Inference, Biometrics
    Vol. 48, No. 4 (Dec., 1992), pp. 1005-1013, International Biometric Society
    Stable URL: http://www.jstor.org/stable/2532694
 (p-value correction for Hommel in appendix)

for multicomparison

new book "multiple comparison in R"
Hsu is a good reference but I do not have it.


Author: Josef Pktd and example from H Raja and rewrite from Vincent Davis


TODO
----
* name of function multipletests, rename to something like pvalue_correction?


    )
namedtuple)lziplrangeN)assert_almost_equalassert_equal)statsinterpolate)SimpleTable)multipletests_ecdffdrcorrectionfdrcorrection_twostage)utils)ValueWarning)studentized_range)qsturngpsturngr   ppfsf)r   r   a	  
  2     3     4     5     6     7     8     9     10
5   3.64 5.70   4.60 6.98   5.22 7.80   5.67 8.42   6.03 8.91   6.33 9.32   6.58 9.67   6.80 9.97   6.99 10.24
6   3.46 5.24   4.34 6.33   4.90 7.03   5.30 7.56   5.63 7.97   5.90 8.32   6.12 8.61   6.32 8.87   6.49 9.10
7   3.34 4.95   4.16 5.92   4.68 6.54   5.06 7.01   5.36 7.37   5.61 7.68   5.82 7.94   6.00 8.17   6.16 8.37
8   3.26 4.75   4.04 5.64   4.53 6.20   4.89 6.62   5.17 6.96   5.40 7.24       5.60 7.47   5.77 7.68   5.92 7.86
9   3.20 4.60   3.95 5.43   4.41 5.96   4.76 6.35   5.02 6.66   5.24 6.91       5.43 7.13   5.59 7.33   5.74 7.49
10  3.15 4.48   3.88 5.27   4.33 5.77   4.65 6.14   4.91 6.43   5.12 6.67       5.30 6.87   5.46 7.05   5.60 7.21
11  3.11 4.39   3.82 5.15   4.26 5.62   4.57 5.97   4.82 6.25   5.03 6.48 5.20 6.67   5.35 6.84   5.49 6.99
12  3.08 4.32   3.77 5.05   4.20 5.50   4.51 5.84   4.75 6.10   4.95 6.32 5.12 6.51   5.27 6.67   5.39 6.81
13  3.06 4.26   3.73 4.96   4.15 5.40   4.45 5.73   4.69 5.98   4.88 6.19 5.05 6.37   5.19 6.53   5.32 6.67
14  3.03 4.21   3.70 4.89   4.11 5.32   4.41 5.63   4.64 5.88   4.83 6.08 4.99 6.26   5.13 6.41   5.25 6.54
15  3.01 4.17   3.67 4.84   4.08 5.25   4.37 5.56   4.59 5.80   4.78 5.99 4.94 6.16   5.08 6.31   5.20 6.44
16  3.00 4.13   3.65 4.79   4.05 5.19   4.33 5.49   4.56 5.72   4.74 5.92 4.90 6.08   5.03 6.22   5.15 6.35
17  2.98 4.10   3.63 4.74   4.02 5.14   4.30 5.43   4.52 5.66   4.70 5.85 4.86 6.01   4.99 6.15   5.11 6.27
18  2.97 4.07   3.61 4.70   4.00 5.09   4.28 5.38   4.49 5.60   4.67 5.79 4.82 5.94   4.96 6.08   5.07 6.20
19  2.96 4.05   3.59 4.67   3.98 5.05   4.25 5.33   4.47 5.55   4.65 5.73 4.79 5.89   4.92 6.02   5.04 6.14
20  2.95 4.02   3.58 4.64   3.96 5.02   4.23 5.29   4.45 5.51   4.62 5.69 4.77 5.84   4.90 5.97   5.01 6.09
24  2.92 3.96   3.53 4.55   3.90 4.91   4.17 5.17   4.37 5.37   4.54 5.54 4.68 5.69   4.81 5.81   4.92 5.92
30  2.89 3.89   3.49 4.45   3.85 4.80   4.10 5.05   4.30 5.24   4.46 5.40 4.60 5.54   4.72 5.65   4.82 5.76
40  2.86 3.82   3.44 4.37   3.79 4.70   4.04 4.93   4.23 5.11   4.39 5.26 4.52 5.39   4.63 5.50   4.73 5.60
60  2.83 3.76   3.40 4.28   3.74 4.59   3.98 4.82   4.16 4.99   4.31 5.13 4.44 5.25   4.55 5.36   4.65 5.45
120   2.80 3.70   3.36 4.20   3.68 4.50   3.92 4.71   4.10 4.87   4.24 5.01 4.36 5.12   4.47 5.21   4.56 5.30
infinity  2.77 3.64   3.31 4.12   3.63 4.40   3.86 4.60   4.03 4.76   4.17 4.88   4.29 4.99   4.39 5.08   4.47 5.16
c                 6    g | ]}|                                 S  )split).0lines     c/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/sandbox/stats/multicomp.py
<listcomp>r   t   s     MMMtzz||MMM    infinity9999
         皙?c                    |dk    r-t          j        t          t          dd| dz
  f                   }nB|dk    r-t          j        t          t          dd| dz
  f                   }nt          d           ||          S )0  
    return critical values for Tukey's HSD (Q)

    Parameters
    ----------
    k : int in {2, ..., 10}
        number of tests
    df : int
        degrees of freedom of error term
    alpha : {0.05, 0.01}
        type 1 error, 1-confidence level



    not enough error checking for limitations
    r%   Nr!   {Gz?z1only implemented for alpha equal to 0.01 and 0.05)r	   interp1dcrowscv005cv001
ValueError)kdfalphaintps       r   get_tukeyQcritr2   }   sz    " }}#E51Q3<88	$#E51Q3<88LMMM488Or   c                 4    t          j        d|z
  | |          S )r'   r$   )r   r   )r.   r/   r0   s      r   get_tukeyQcrit2r4      s    "  5!R000r   c                 .    t          j        || |          S )a  
    return adjusted p-values for Tukey's HSD

    Parameters
    ----------
    k : int in {2, ..., 10}
        number of tests
    df : int
        degrees of freedom of error term
    q : scalar, array_like; q >= 0
        quantile value of Studentized Range

    )r   r   )r.   r/   qs      r   get_tukey_pvaluer7      s     1b)))r   c                    t          j        |           }t          j        |          }t          j        |          }t          j        |           }t          j        |          }t          j        |          }t          j        |d          }	t          j        |d          }
t          j        |d          }|	dz  |
dz  z   |dz  z   }t          |           t          |          z   t          |          z   dz
  }||z  }t          j        |t          |           z            }t          |           t          |          z   t          |          z   dz
  }d}||z
  }d}t          d|d          }t          j        ||z
            |z  }t          j        ||z
            |z  }t          j        ||z
            |z  }g }t          |           t          |           t          |           ||k    r|
                    d           n|
                    d           ||k    r|
                    d	           n|
                    d
           ||k    r|
                    d           n|
                    d           |S )Nr!      r$         ?r%   r0   3to1null3to1alt3to2null3to2alt2to1null2to1alt)npmeanstdmathpowlensqrtr2   fabsprintappend)firstsecondthird	firstmean
secondmean	thirdmeanfirststd	secondstdthirdstdfirsts2seconds2thirds2
mserrornum
mserrordenmserrorstandarderrordftotaldfgroupsdferrorqcrit	qtest3to1	qtest3to2	qtest2to1
conclusions                           r   Tukeythreegenerd      sh    IJI ve}}HvIve}}H hx##Gx	1%%Hhx##G 1x!|+gk9Je**s6{{*SZZ71<J:%GIgE

233M
 %jj3v;;&U3a7GH GE1gT222E 9y011]BI9z122mCI:	122mCIJ 
)	)	) 5*%%%%)$$$5*%%%%)$$$5*%%%%)$$$r   c                    g }g }| D ]P}|                     t          j        |                     |                     t          j        |                     Qg }|D ]*}|                     t	          j        |d                     +t          |          dz  }t          | d                   t          | d                   z   t          | d                   z   dz
  }||z  }dS )z)gend is a list, ie [first, second, third]r!   r   r$   r9   N)rK   rB   rC   rD   rE   rF   sumrG   )	genesmeansstdsgenerD   stds2rX   rY   rZ   s	            r   Tukeythreegene2rl     s     ED ! !RWT]]###

26$<<     E & &TXc!__%%%% UAJeAh--E!H-c%(mm;Q>J#GGGr   c                     t          j        |           }t          j        d t          |           D                       }||fS )Nc                 \    g | ])\  }}|t          j        t          |                    z  *S r   )rB   onesrG   )r   r.   arrs      r   r   zcatstack.<locals>.<listcomp>(  s1    III#"'#c((+++IIIr   )rB   hstack	enumerate)argsxlabelss      r   catstackrv   &  s=    
	$AYII4IIIJJFf9r   c                    t          j        |           } | dd         dk     }| dd         dk    }t          j        ||z  | dd         dk    z            d         dz   }| d         dk    rt          |          }nd}||fS )aY  find all up zero crossings and return the index of the highest

    Not used anymore


    >>> np.random.seed(12345)
    >>> x = np.random.randn(8)
    >>> x
    array([-0.20470766,  0.47894334, -0.51943872, -0.5557303 ,  1.96578057,
            1.39340583,  0.09290788,  0.28174615])
    >>> maxzero(x)
    (4, array([1, 4]))


    no up-zero-crossing at end

    >>> np.random.seed(0)
    >>> x = np.random.randn(8)
    >>> x
    array([ 1.76405235,  0.40015721,  0.97873798,  2.2408932 ,  1.86755799,
           -0.97727788,  0.95008842, -0.15135721])
    >>> maxzero(x)
    (None, array([6]))
    Nr"   r   r$   rB   asarraynonzeromaxrt   cond1cond2allzerosmaxzs        r   maxzeror   .  s    2 	
1AcrcFQJEabbEAIEz55=QqrrUAX677:Q>Huqyy8}}>r   c                    t          j        |           } | dd         dk    }| dd         dk     }t          j        ||z  | dd         dk    z            d         dz   }| d         dk    rt          |          }nd}||fS )aT  find all up zero crossings and return the index of the highest

    Not used anymore

    >>> np.random.seed(12345)
    >>> x = np.random.randn(8)
    >>> x
    array([-0.20470766,  0.47894334, -0.51943872, -0.5557303 ,  1.96578057,
            1.39340583,  0.09290788,  0.28174615])
    >>> maxzero(x)
    (4, array([1, 4]))


    no up-zero-crossing at end

    >>> np.random.seed(0)
    >>> x = np.random.randn(8)
    >>> x
    array([ 1.76405235,  0.40015721,  0.97873798,  2.2408932 ,  1.86755799,
           -0.97727788,  0.95008842, -0.15135721])
    >>> maxzero(x)
    (None, array([6]))
Nr"   r   r$   rx   r|   s        r   maxzerodownr   R  s    0 	
1AcrcFQJEabbEAIEz55=QqrrUAX677:Q>Huqyy8}}>r   r:   c                 j    t          j        |           t          |           z  }||d|z
  z  |z   z  }|S )zireference line for rejection in multiple tests

    Not used anymore

    from: section 3.2, page 60
    r$   )rB   arangefloat)nr0   tfrejs       r   rejectionliner   w  s8     		!U1XXAqAeG}u$%DKr   indepc           	         t          j        |           } t          j        |           }| |         }t          |          }|dv r||z  }n|dv r@t          j        dt          j        dt          |                     z            }||z  |z  }nb|dv r||d|z
  z  |z   z  }nO|dv r<t          j        t          j        t          |                               }||z  |z  }nt          d          ||k    }|                                r(t          t          j
        |          d                   }	nd}	d	|d
|	<   ||                                         S )zReject False discovery rate correction for pvalues

    Old version, to be deleted


    missing: methods that estimate fraction of true hypotheses

    )ir   pposcorr)r   negcorr      ?r$   )gonegcorr)oth	o2negcorrzmethod not availabler   TN)rB   ry   argsortecdfrf   r   rG   r-   anyr{   rz   )
pvalsr0   methodpvals_sortindpvals_sortedpecdfrlinecmreject	rejectmaxs
             r   fdrcorrection_bakr     sh    JuE Ju%%M'LE///u$	#	#	#VBryCJJ///00u$r)	$	$	$ag 6 >?	'	'	'VBIc%jj))**u$b(/000e^Fzz|| 
6**1-..			F:I:-''))**r   d   2   
              c           
         ||z
  }t          j        dg|z  |g||z
  z  z             }g }	t          |           D ]}
|t          |||f          z   }t	          j        |d          \  }}t          t          j        |          |d          }t          t          j        |          |          }|		                    t          j
        |d|                   t          j
        ||d                   gt          j
        |d|                   t          j
        ||d                   gz   |                                z   t          j        |                                          z   t          j
        |d|         |k               t          j
        ||d         |k               gz   t          j
        |d|         ||z  k               t          j
        ||d         ||z  k               gz              t          j        |	          S )z%MonteCarlo to test fdrcorrection
    r   )sizer   r   r0   r   r;   N)rB   arrayrangerandmvnr   ttest_1sampr   absfdrcorrection0rK   rf   tolistsort)nreplnobsntestsntruemur0   rhonfalselocsresultsr   rvstttpvalresres0s                   r   mcfdrr     s    e^F8RDJ"v~!6677DG5\\ = =WSf~6666%c1--	EuU3GGGbfUmm5999s6E6{++RVCK-@-@AtFUF|,,bfT%&&\.B.BCDzz||$ wu~~,,../ uVeV}U233uUVV}U2335	5 uVeV}U6\9::uUVV}U6\9::<< 	= 	= 	= 	= 8Gr   r$   r!   Fc                     |\  }}d| k     rt| dk     rnt           j                            ||dz             }|ddddf         t          j        d| z
            z  |ddddf         t          j        |           z  z   }n| dk    r!t           j                            ||          }n| dk     r| d|dz
  z  k     rt	          d          | d|dz
  z  k    rd|dz
  dz   z  } | t          j        ||f          z  d| z
  t          j        |          z  z   }t          j        t           j                            ||          t           j        	                    |          j
                  }|rt          j        |          }|S )a  create random draws from equi-correlated multivariate normal distribution

    Parameters
    ----------
    rho : float
        correlation coefficient
    size : tuple of int
        size is interpreted (nobs, nvars) where each row

    Returns
    -------
    rvs : ndarray
        nobs by nvars where each row is a independent random draw of nvars-
        dimensional correlated rvs

    r   r$   Nr"   g      z'rho has to be larger than -1./(nvars-1)g|=)rB   randomrandnrH   r-   ro   eyedotlinalgcholeskyTr   zscore)r   r   standardizer   nvarsr   rvs2As           r   r   r     so   " KD%3ww377ioodE!G,,111SbS5zBGAcENN*S233Z"'#,,-FF	qytU++	qeAgFGGGCqM!!uQwu}%Cu&&&#rve}}'<<vbioodE22BI4F4Fq4I4I4KLL "|D!!Kr   c                     t          j        t          j        | t                              }||dk             }t	          t          |                     }d|dz  |z
                                  |dz  |z
  z  z
  }|S )z:

    should be equivalent of scipy.stats.tiecorrect

    dtyper$   r9   )rB   bincountry   intr   rG   rf   )xranksrankbincountntiesntottiecorrections        r   
tiecorrectr     sv     ;rz&s;;;<<L)*EVDE)..00$'D.AAMr   c                   <    e Zd ZdZd
dZddZddZd Zd Zd	 Z	dS )GroupsStatsa\  
    statistics by groups (another version)

    groupstats as a class with lazy evaluation (not yet - decorators are still
    missing)

    written this time as equivalent of scipy.stats.rankdata
    gs = GroupsStats(X, useranks=True)
    assert_almost_equal(gs.groupmeanfilter, stats.rankdata(X[:,0]), 15)

    TODO: incomplete doc strings

    FNc                 P   t          j        |          | _        |$t          j        |dddf         d          \  }}n |t          j        |dddf                   }|| _        || _        || _        t          j        |          x| _        }| 	                    |           dS )a  descriptive statistics by groups

        Parameters
        ----------
        x : ndarray, 2d
            first column data, second column group labels
        useranks : bool
            if true, then use ranks as data corresponding to the
            scipy.stats.rankdata definition (start at 1, ties get mean)
        uni, intlab : arrays (optional)
            to avoid call to unique, these can be given as inputs


        Nr$   Treturn_inverseuseranks)
rB   ry   rt   uniquer   uniintlabr   	groupnobsrunbasic)selfrt   r   r   r   r   s         r   __init__zGroupsStats.__init__  s     A>)AaaacF4@@@KC[)AaaacF##C  %'[%8%88 	x(((((r   c                 F   | j         }|r9|dddf                                                                         dz   | _        n|dddf         | _        t          j        | j        | j                  x| _        }|dz  | j        z  x| _        }|| j                 | _	        dS )runbasic_oldNr$   r   weightsr   )
rt   r   xxrB   r   r   groupsumr   	groupmeangroupmeanfilter)r   r   rt   groupranksumgrouprankmeans        r   r   zGroupsStats.runbasic_old-  s     F 	!fnn&&..0014DGG!fDG'){4;'P'P'PP *6);dn)LL,T[9r   c                    | j         }|rt          j        |dddf         d          \  }}|dddf                                                                         dz   }t	          t          j        ||g          d          j        | _        n|dddf         | _        t          j        | j	        | j                  x| _
        }|d	z  | j        z  x| _        }|| j	                 | _        dS )
r   Nr   Tr   r$   Fr   r   r   )rt   rB   r   r   r   column_stackr   r   r   r   r   r   r   )r   r   rt   xunixintlabranksrawr   r   s           r   r   zGroupsStats.runbasic?  s     F 	Ia!fTBBBMD'1v~~''//11A5H!"/8W2E"F"F+02 2 22A GG !fDG'){4;'P'P'PP *6);dn)LL,T[9r   c                      | j         | j        z
  S )groupdemean)r   r   r   s    r   r   zGroupsStats.groupdemeanT  s    w---r   c                 f    |                                  }t          j        | j        |dz            S )groupsswithinr!   r   )r   rB   r   r   )r   xtmps     r   r   zGroupsStats.groupsswithinX  s.    !!{4;a8888r   c                 @    |                                  | j        dz
  z  S )groupvarwithinr$   )r   r   r   s    r   r   zGroupsStats.groupvarwithin]  s     !!##T^A%566r   )FNNF)
__name__
__module____qualname____doc__r   r   r   r   r   r   r   r   r   r   r     s         ) ) ) )B: : : :$: : : :*. . .9 9 9
7 7 7 7 7r   r   c                   >    e Zd ZdZ	 	 	 d	dZd Zd Zd Z	 	 d
dZdS )TukeyHSDResultsaO  Results from Tukey HSD test, with additional plot methods

    Can also compute and plot additional post-hoc evaluations using this
    results class.

    Attributes
    ----------
    reject : array of boolean, True if we reject Null for group pair
    meandiffs : pairwise mean differences
    confint : confidence interval for pairwise mean differences
    std_pairs : standard deviation of pairwise mean differences
    q_crit : critical value of studentized range statistic at given alpha
    halfwidths : half widths of simultaneous confidence interval
    pvalues : adjusted p-values from the HSD test

    Notes
    -----
    halfwidths is only available after call to `plot_simultaneous`.

    Other attributes contain information about the data from the
    MultiComparison instance: data, df_total, groups, groupsunique, variance.
    Nc                    || _         || _        || _        || _        || _        || _        || _        || _        |	| _        |
| _	        || _
        | j         j        | _        | j         j        | _        | j         j        | _        d S N)
_multicomp_results_tableq_critr   	meandiffs	std_pairsconfintdf_totalreject2variancepvaluesdatagroupsgroupsunique)r   	mc_objectresults_tabler  r   r  r  r  r  r  r  r  s               r   r   zTukeyHSDResults.__init__x  s}     $+""  O(	o, O8r   c                 *    t          | j                  S r  )strr
  r   s    r   __str__zTukeyHSDResults.__str__  s    4&'''r   c                     | j         S )z*Summary table that can be printed
        )r
  r   s    r   summaryzTukeyHSDResults.summary  s     ""r   c                 z    t          | j        | j        | j        j        j        | j        j                  | _        dS )zKCompute simultaneous confidence intervals for comparison of means.
        N)simultaneous_cir  r  r	  
groupstatsr   pairindices
halfwidthsr   s    r   _simultaneous_ciz TukeyHSDResults._simultaneous_ci  s4     *$+t} O6@ O79 9r   r   r   c           	          t          j        |          \  }}||                    |           t           dd                                             j        j        j        g }g }	 fdt          t                              D             }
 fdt          t                              D             }|=|
                    t          t                               j        dddd           n| j        vrt          d	          t          j         j        |k              d
         d
         }t          t                              D ]|} j        |         |k    rt#          ||         ||                   t%          |
|         |
|                   z
  d
k     r|                    |           g|	                    |           }|
                    |         | j        |         dddd           |                    |
|         gdz  d j        j        gdd           |                    ||         gdz  d j        j        gdd           t          |          d
k    r-|
                    |         | j        |         dddd           t          |	          d
k    r-|
                    |	         |	 j        |	         dddd           |                    d           t          j        |          t          j        |
          z
  }|                    d j        j        g           |                    t          j        |
          |dz  z
  t          j        |          |dz  z   g           dg j                            t4                                                    z   dgz   }|                    t          j        dt                    dz                        |                    |           |                    ||nd           |                     ||nd           |S )a
  Plot a universal confidence interval of each group mean

        Visualize significant differences in a plot with one confidence
        interval per group instead of all pairwise confidence intervals.

        Parameters
        ----------
        comparison_name : str, optional
            if provided, plot_intervals will color code all groups that are
            significantly different from the comparison_name red, and will
            color code insignificant groups gray. Otherwise, all intervals will
            just be plotted in black.
        ax : matplotlib axis, optional
            An axis handle on which to attach the plot.
        figsize : tuple, optional
            tuple for the size of the figure generated
        xlabel : str, optional
            Name to be displayed on x axis
        ylabel : str, optional
            Name to be displayed on y axis

        Returns
        -------
        Figure
            handle to figure object containing interval plots

        Notes
        -----
        Multiple comparison tests are nice, but lack a good way to be
        visualized. If you have, say, 6 groups, showing a graph of the means
        between each group will require 15 confidence intervals.
        Instead, we can visualize inter-group differences with a single
        interval for each group mean. Hochberg et al. [1] first proposed this
        idea and used Tukey's Q critical value to compute the interval widths.
        Unlike plotting the differences in the means and their respective
        confidence intervals, any two pairs can be compared for significance
        by looking for overlap.

        References
        ----------
        .. [*] Hochberg, Y., and A. C. Tamhane. Multiple Comparison Procedures.
               Hoboken, NJ: John Wiley & Sons, 1987.

        Examples
        --------
        >>> from statsmodels.examples.try_tukey_hsd import cylinders, cyl_labels
        >>> from statsmodels.stats.multicomp import MultiComparison
        >>> cardata = MultiComparison(cylinders, cyl_labels)
        >>> results = cardata.tukeyhsd()
        >>> results.plot_simultaneous()
        <matplotlib.figure.Figure at 0x...>

        This example shows an example plot comparing significant differences
        in group means. Significant differences at the alpha=0.05 level can be
        identified by intervals that do not overlap (i.e. USA vs Japan,
        USA vs Germany).

        >>> results.plot_simultaneous(comparison_name="USA")
        <matplotlib.figure.Figure at 0x...>

        Optionally provide one of the group names to color code the plot to
        highlight group means different from comparison_name.
        Nr!  c                 <    g | ]}|         j         |         z
  S r   r!  r   r   rh   r   s     r   r   z5TukeyHSDResults.plot_simultaneous.<locals>.<listcomp>  )    MMMaE!Htq11MMMr   c                 <    g | ]}|         j         |         z   S r   r&  r'  s     r   r   z5TukeyHSDResults.plot_simultaneous.<locals>.<listcomp>  r(  r   oNoner.   )xerrmarker	linestylecolorecolorz)comparison_name not found in group names.r   br!   r"   z--z0.7)r.  r/  rz0.5z.Multiple Comparisons Between All Pairs (Tukey)g      $@ r$   )!r   create_mpl_axset_size_inchesgetattrr"  r	  r  r   r   rG   errorbarr   r!  r  r-   rB   whereminr{   rK   plotngroups	set_titleset_ylimset_xlimastyper  r   
set_yticksr   set_yticklabels
set_xlabel
set_ylabel)r   comparison_nameaxfigsizexlabelylabelfigax1sigidxnsigidxminrangemaxrangemidxr   r2  ylblsrh   s   `               @r   plot_simultaneousz!TukeyHSDResults.plot_simultaneous  s`   B &r**S(((4t,,4!!###*4 MMMMM5U;L;LMMMMMMMM5U;L;LMMM"LLs5zz 2 2 #vS  N N N N d&777 !LMMM8D->??B1ED3u::&& & &$Q'?::Xd^44),Xa[(4.)I)IJLMN NMM!$$$$NN1%%%%LLtd1F #vS  N N NHHhtn%a'"do.E)F#5  2 2 2HHhtn%a'"do.E)F#5  2 2 2 6{{QU6]F"&/&"9#'-S  F F F 7||aU7^W"&/'":3'-U5  J J J 	FGGGF8rvh///b$/12333bfX&&S0"&2B2BQW2LMNNNt(//44;;===DrySZZ!^44555E"""!3vv<<<!3vv<<<
r   )NNNNNNNN)NNr#  NN)	r  r  r  r  r   r  r  r"  rQ  r   r   r   r  r  a  s         , AEHL6:9 9 9 9(( ( (# # #9 9 9 HN.2x x x x x xr   r  c                   8    e Zd ZdZddZd ZddZdd
ZddZdS )MultiComparisona  Tests for multiple comparisons

    Parameters
    ----------
    data : ndarray
        independent data samples
    groups : ndarray
        group labels corresponding to each data point
    group_order : list[str], optional
        the desired order for the group mean results to be reported in. If
        not specified, results are reported in increasing order.
        If group_order does not contain all labels that are in groups, then
        only those observations are kept that have a label in group_order.

    Nc                 H    t          |          t          |          k    r.t          dt          |          t          |          fz            t          j        |           _        t          j        |          x _        }|%t          j        |d          \   _         _        n\|D ]}||vrt          d|z            t          j	        |           _        t          j
        t          |          t                     _         j                            d           d} j        D ]b}t          j         j        |k              d         }|t          |          z  }t          j         j        |k              d          j        |<   c| j        j        d         k    r`dd l}|                    dt"                      j        dk    }	 j        |	          _         j        |	          _         j        |	          _        t           j                  dk     rt          d	           fd
 j        D              _        t          j        t           j                  d           _         j        j        d          _        t           j                   _        d S )Nz&data has %d elements and groups has %dTr   z*group_order value '%s' not found in groupsir   z>group_order does not contain all groups: dropping observationsr!   z22 or more groups required for multiple comparisonsc                 <    g | ]}j         j        |k             S r   )r  r  )r   r.   r   s     r   r   z,MultiComparison.__init__.<locals>.<listcomp>P  s'    NNNqty!12NNNr   r$   )rG   r-   rB   ry   r  r  r   r  groupintlabr   emptyr   fillr8  shapewarningswarnr   datalitriu_indicesr   r   r;  )
r   r  r  group_ordergrpcountnameidxrZ  	mask_keeps
   `         r   r   zMultiComparison.__init__(  s   t99F##ETTWX^T_T_H``aaaJt$$	!z&111f 24)FKO3Q 3Q 3Q/Dt// # P Pf$$$H3NP P P % !# 5 5D!xD		377D!!$'''E) O Oht{d233A6S!(*1Bd1J(K(KA(N %%	***   78DF F F !,4	#'#3I#>  Ii0	"k)4t !!A%%QRRRNNNND<MNNN?3t/@+A+A1EEIOA&	4,--r   c                     t          t          j        | j        | j        g          d          | _        | j        j        | _        dS )zconvert data to rankdata and attach


        This creates rankdata as it is used for non-parametric tests, where
        in the case of ties the average rank is assigned.


        Tr   N)r   rB   r   r  rV  ranksr   rankdatar   s    r   getrankszMultiComparison.getranksV  sC     !$)T=M1N!O!O*.0 0 0

2r   r   c           
         |                                   | j        }| j        j        }| j        j        }||dz   z  dz  t          j        | j                  z  }t          d           t          | j
         D ]\  }}t          j        ||         ||         z
            }	t          j        |t          j        d|||g         z            z            }
|	|
z  }t          |||	|
|	|
z  |	|
z  dk               t          t
          j                            |          dz             t
          j                            |          dz  c S dS )z
        pairwise comparison for kruskal-wallis test

        This is just a reimplementation of scipy.stats.kruskal and does
        not yet use a multiple comparison correction.

        r         (@zMultiComparison.kruskal?5^I@r!   N)rg  r   re  r   r   r   r   rf  rJ   zipr   rB   r   rH   rf   normr   )r   pairsmultimethodtot	meanranksr   fr   jpdiffseQs               r   kruskalzMultiComparison.kruskalf  s?    	iJ(	J(	 #(c!U%5dm%D%D
D'(((() 		( 		(CAaF9Q<)A,677ERVBAa5)9$9;;;<<B
A !Aub%"*ebj6.ABBB%*--""Q&''':==##a''''		( 		(r   r%   bonfr$   c                 d   g }t          | j         D ]:\  }}|                     || j        |         | j        |                              ;t	          j        |          }t          |dd|f         ||          \  }}	}
}| j        \  }}|	t	          j        t          | j        |         | j        |         t	          j	        |dddf         d          t	          j	        |dddf         d          |          dt          fdt          fdt          fd	t          fd
t          j        fg          }nt	          j        t          | j        |         | j        |         t	          j	        |dddf         d          t	          j	        |dddf         d          t	          j	        |	d          |          dt          fdt          fdt          fd	t          fdt          fd
t          j        fg          }t          ||j        j                  }d|j        d||fz  d|
|fz  z   |_        ||||	|
|f|fS )a  run a pairwise test on all pairs with multiple test correction

        The statistical test given in testfunc is calculated for all pairs
        and the p-values are adjusted by methods in multipletests. The p-value
        correction is generic and based only on the p-values, and does not
        take any special structure of the hypotheses into account.

        Parameters
        ----------
        testfunc : function
            A test function for two (independent) samples. It is assumed that
            the return value on position pvalidx is the p-value.
        alpha : float
            familywise error rate
        method : str
            This specifies the method for the p-value correction. Any method
            of multipletests is possible.
        pvalidx : int (default: 1)
            position of the p-value in the return of testfunc

        Returns
        -------
        sumtab : SimpleTable instance
            summary table for printing

        errors:  TODO: check if this is still wrong, I think it's fixed.
        results from multipletests are in different order
        pval_corrected can be larger than 1 ???
        Nr   r      r$   group1group2statpvalr   r   	pval_corrheadersz.Test Multiple Comparison %s 
%s%4.2f method=%szFWER=z$
alphacSidak=%4.2f, alphacBonf=%5.3f)rk  r   rK   r\  rB   r   r   r   r  roundobjectr   bool_r
   r   namesr  title)r   testfuncr0   r   pvalidxr   r   rr  r   pvals_correctedalphacSidak
alphacBonfi1i2resarrr  s                   r   allpairtestzMultiComparison.allpairtest  sg   < () 	A 	ACAaJJxxAA??@@@@hsmmc!!!W*oU6JJJ 	9j !B"Xd4#4R#8$:KB:O"$(3qqqs8A"6"6"$(3qqqs8A"6"6"(* *  (0'0%en%en'2	4	5 5 5FF Xd4#4R#8$:KB:O"$(3qqqs8A"6"6"$(3qqqs8A"6"6"$(?1"="="(	* *
  (0'0%en%en*51'24
5 
5 
5F $FFL4FGGG= '5&9:3J'(( 	 sFO*J89?@ 	@r   c                 >   t          t          j        | j        | j        g          d          | _        | j        j        }| j        j        }t          j        | j        	                                t          |                    }t          |||d|d          }t          j        t          | j        |d         d                  | j        |d         d                  t          j        |d         d	          t          j        |d
         d	          t          j        |d	         dddf         d	          t          j        |d	         dddf         d	          |d                   dt           fdt           fdt"          fdt"          fdt"          fdt"          fdt          j        fg          }t'          ||j        j                  }dd|z  z   |_        t/          | ||d         |d         |d         |d         |d	         |d         |d         ||d
                   S )at  
        Tukey's range test to compare means of all pairs of groups

        Parameters
        ----------
        alpha : float, optional
            Value of FWER at which to calculate HSD.

        Returns
        -------
        results : TukeyHSDResults instance
            A results class containing relevant data and some post-hoc
            calculations
        Fr   )ddofN)r/   r0   r  r   r$   r!   ry     rz  r{  meandiffzp-adjlowerupperr   r   r  z*Multiple Comparison of Means - Tukey HSD, z
FWER=%4.2f   r9   r      )r   rB   r   r  rV  r  r   r   varr   rG   tukeyhsdr   r   r  r  r  r   r  r
   r   r  r  r  )r   r0   gmeansgnobsvar_r   r  r  s           r   r  zMultiComparison.tukeyhsd  s    &OTY(89::   *) vdo1133#f++FFF vudt5NNN$t0Q;#0Q;!xA22!xA22!xAqqq!ta88!xAqqq!ta88"1v' ' #+F!3"*F!3",e!4")5!1")5!1")5!1"*BH!5!78 8 8 $FFL4FGGGJ*U23 t]CFCFCF"1vs1vs1vs1vtSVM M 	Mr   r  )Nr   )r%   rw  r$   r%   )	r  r  r  r  r   rg  rv  r  r  r   r   r   rS  rS    s          +. +. +. +.\3 3 3 ( ( ( (:E@ E@ E@ E@N/M /M /M /M /M /Mr   rS  c                 |   t          j        | dddf         d          \  }}t          j        |          }t          j        |t          dddf                   }|dz  |z  }| dddf                                                                         }t          j        ||          }|dz  |z  dz   }||         S )zvrankdata, equivalent to scipy.stats.rankdata

    just a different implementation, I have not yet compared speed

    Nr   Tr   r   r   r$   )rB   r   r   Xr   )	rt   r   r   r   	groupxsum
groupxmeanrankrawr   r   s	            r   rf  rf    s     )AaaacF4888KCF##IFAaaacF333IS9,J!fnn&&((G;vw777L 3&2Q6M  r   c                 L   t          j        |           } |}t          j        |           }| |         }|                                }t          |           }t          j        |d          \  }}t          d          D ]&}	t          d|	d          D ]}
t          |	|
           'dS )zsimple ordered sequential comparison of means

    vals : array_like
        means or rankmeans for independent groups

    incomplete, no return, not used yet
    r$   ry  r"   N)rB   ry   r   rG   r]  r   rJ   )valsr0   alphafsortindr   
sortrevindr   v1v2r   rr  s              r   compare_orderedr    s     :dDFjGME""JYYF _VQ''FB1XX  qB 	 	A!AJJJJ	 r   c                     t          j        |           } |sd| z                                  S d| z                                  t          |           z  S )aO  correction factor for variance with unequal sample sizes

    this is just a harmonic mean

    Parameters
    ----------
    nobs_all : array_like
        The number of observations for each sample
    srange : bool
        if true, then the correction is divided by the number of samples
        for the variance of the studentized range statistic

    Returns
    -------
    correction : float
        Correction factor for variance.


    Notes
    -----

    variance correction factor is

    1/k * sum_i 1/n_i

    where k is the number of samples and summation is over i=0,...,k-1.
    If all n_i are the same, then the correction factor is 1.

    This needs to be multiplied by the joint variance estimate, means square
    error, MSE. To obtain the correction factor for the standard deviation,
    square root needs to be taken.

    r   )rB   ry   rf   rG   )nobs_allsranges     r   varcorrection_unbalancedr  (  sS    D z(##H 18  """8  ""3x==00r   c                 h    t          j        | |           \  }}|sd|z  d|z  z   S d|z  d|z  z   dz  S )a  correction factor for variance with unequal sample sizes for all pairs

    this is just a harmonic mean

    Parameters
    ----------
    nobs_all : array_like
        The number of observations for each sample
    srange : bool
        if true, then the correction is divided by 2 for the variance of
        the studentized range statistic

    Returns
    -------
    correction : ndarray
        Correction factor for variance.


    Notes
    -----

    variance correction factor is

    1/k * sum_i 1/n_i

    where k is the number of samples and summation is over i=0,...,k-1.
    If all n_i are the same, then the correction factor is 1.

    This needs to be multiplies by the joint variance estimate, means square
    error, MSE. To obtain the correction factor for the standard deviation,
    square root needs to be taken.

    For the studentized range statistic, the resulting factor has to be
    divided by 2.

    r          @rB   meshgrid)r  r  n1n2s       r   varcorrection_pairs_unbalancedr  P  sK    L [8,,FB $2222##r   c                     t          j        |           } | dz  |z  }|                                }|dz  |dz  |z                                  z  }||fS )a  return joint variance from samples with unequal variances and unequal
    sample sizes

    something is wrong

    Parameters
    ----------
    var_all : array_like
        The variance for each sample
    nobs_all : array_like
        The number of observations for each sample
    df_all : array_like
        degrees of freedom for each sample

    Returns
    -------
    varjoint : float
        joint variance.
    dfjoint : float
        joint Satterthwait's degrees of freedom


    Notes
    -----
    (copy, paste not correct)
    variance is

    1/k * sum_i 1/n_i

    where k is the number of samples and summation is over i=0,...,k-1.
    If all n_i are the same, then the correction factor is 1/n.

    This needs to be multiplies by the joint variance estimate, means square
    error, MSE. To obtain the correction factor for the standard deviation,
    square root needs to be taken.

    This is for variance of mean difference not of studentized range.
    r   r!   )rB   ry   rf   )var_allr  df_all
var_over_nvarjointdfjoints         r   varcorrection_unequalr  |  s_    P j!!G"h&J~~HkZ]V388:::GWr   c                     t          j        | |           \  }}t          j        ||          \  }}t          j        ||          \  }}||z  ||z  z   }	|	dz  |||z  dz  z  |||z  dz  z  z   z  }
|	|
fS )a  return joint variance from samples with unequal variances and unequal
    sample sizes for all pairs

    something is wrong

    Parameters
    ----------
    var_all : array_like
        The variance for each sample
    nobs_all : array_like
        The number of observations for each sample
    df_all : array_like
        degrees of freedom for each sample

    Returns
    -------
    varjoint : ndarray
        joint variance.
    dfjoint : ndarray
        joint Satterthwait's degrees of freedom


    Notes
    -----

    (copy, paste not correct)
    variance is

    1/k * sum_i 1/n_i

    where k is the number of samples and summation is over i=0,...,k-1.
    If all n_i are the same, then the correction factor is 1.

    This needs to be multiplies by the joint variance estimate, means square
    error, MSE. To obtain the correction factor for the standard deviation,
    square root needs to be taken.

    TODO: something looks wrong with dfjoint, is formula from SPSS
    r!   r  )r  r  r  r  r  r  r  df1df2r  r  s              r   varcorrection_pairs_unequalr    s    R ['**FB[8,,FB{66**HC"ur"u}HkSBrEA:-r"uqj0@@AGWr   c           	      Z   t          j        |           } t          |           }||dz
  }t          j        |          dk    r||z  }t          j        |          |z  }nt          j        |          }t          j        |          dk    r8t          j        |          dk    r d|z  |z  t          j        ||f          z  }nnt          j        |          dk    r|t          |d          z  }nAt          j        |          dk    rt          |||          \  }}	|dz  }nt          d          | | dddf         z
  }
t          j	        |          }t          j
        |d          \  }}|
||f         }|||f         }t          j        |          |z  }t          |d          }|t          |||	          }t          |||          }t          j        |          }||k    }||z  }t          j        |          |k    }t          j        ||z
  ||z   f          }||f||||||||f	S )
a  simultaneous Tukey HSD


    check: instead of sorting, I use absolute value of pairwise differences
    in means. That's irrelevant for the test, but maybe reporting actual
    differences would be better.
    CHANGED: meandiffs are with sign, studentized range uses abs

    q_crit added for testing

    TODO: error in variance calculation when nobs_all is scalar, missing 1/n

    Nr$   r   Tr  r  not supposed to be herer  r;   )rB   ry   rG   r   ro   rf   r  r  r-   rH   r]  r   r{   r4   r7   
atleast_1dr   )mean_allr  r  r/   r0   r  n_meansr  	var_pairsdf_sum
meandiffs_
std_pairs_idx1idx2r  r  st_range	df_total_r  r   crit_intr  r  s                          r   r  r    s\    z(##H (mmG	z\	wr{{aR<WW"6"::
QRWW%5%5%:%:L8+bgw6H.I.II					Q		<XDHJ J J J					A		7'2NN	6R		 2333 HQQQtV,,J##J !,,JD$4:&I4:&Ivi  9,HHa  I~ (%@@@w(;;GmG$$GF6!HfY(*Goy83Y5IJKKG4L&)Ygw( (r   c                    t          |          }|t          j        |d          }||z  }t          j        ||d                  ||d                  z             }t          j        ||f          }|||<   ||                                j        z   }t          j        |          }t          j        |d          }	|dk    r|dz
  |	z  |z
  |dz
  |dz
  z  z  }
n|t          j        d          z  dz  }
| t          j        d          z  |
z  S )	a  Compute simultaneous confidence intervals for comparison of means.

    q_crit value is generated from tukey hsd test. Variance is considered
    across all groups. Returned halfwidths can be thought of as uncertainty
    intervals around each group mean. They allow for simultaneous
    comparison of pairwise significance among any pairs (by checking for
    overlap)

    Parameters
    ----------
    q_crit : float
        The Q critical value studentized range statistic from Tukey's HSD
    var : float
        The group variance
    groupnobs : array_like object
        Number of observations contained in each group.
    pairindices : tuple of lists, optional
        Indices corresponding to the upper triangle of matrix. Computed
        here if not supplied

    Returns
    -------
    halfwidths : ndarray
        Half the width of each confidence interval for each group given in
        groupnobs

    See Also
    --------
    MultiComparison : statistics class providing significance tests
    tukeyhsd : among other things, computes q_crit value

    References
    ----------
    .. [*] Hochberg, Y., and A. C. Tamhane. Multiple Comparison Procedures.
           Hoboken, NJ: John Wiley & Sons, 1987.)
    Nr$   r   )axisr!   r   r  )r!   r$   )	rG   rB   r]  rH   zerosconjr   rf   ro   )r  r  r   r   nggvard12dsum1sum2ws              r   r  r  (  s
   L 
YBob!,, ?D
'${1~&k!n)==
>
>C 	"bAAkN	AFFHHJA 6#;;D6!!D
Qet^d"RBG'<=276??"R'RWQZZ""r   c                 *   t          j        |           } t          |           }||dz
  }t          j        |          dk    r||z  }nt          j        |          }t          j        |          dk    r8t          j        |          dk    r d|z  |z  t          j        ||f          z  }nnt          j        |          dk    r|t          |d          z  }nAt          j        |          dk    rt          |||          \  }}|dz  }nt          d          | | dddf         z
  }	t          j	        |          }
t          j
        |d          \  }}|rt          ||f         }	t          ||f         }
t          j        |	          |
z  }||	|
||ffS )zpairwise distance matrix, outsourced from tukeyhsd



    CHANGED: meandiffs are with sign, studentized range uses abs

    q_crit added for testing

    TODO: error in variance calculation when nobs_all is scalar, missing 1/n

    Nr$   r   Tr  r  r  )rB   ry   rG   r   rf   ro   r  r  r-   rH   r]  r  r  r   )r  r  r  r/   triur  r  r  r  r  r  r  r  r  s                 r   distance_st_ranger  g  s    z(##H (mmG	z\	wr{{aR<6"::
QRWW%5%5%:%:L8+bgw6H.I.II					Q		<XDHJ J J J					A		7'2NN	6R		 2333 8AAAdF++I	""I!,,JD$ +tTz*	tTz*	vi  9,HY	D;66r   c                     g }t          |           D ]K}t          |dz   |           D ]5}t          j        |           }d||<   d||<   |                    |           6Lt          j        |          S )zcontrast or restriction matrix for all pairs of nm variables

    Parameters
    ----------
    nm : int

    Returns
    -------
    contr : ndarray, 2d, (nm*(nm-1)/2, nm)
       contrast matrix for all pairwise comparisons

    r$   r"   )r   rB   r  rK   r   )nmcontrr   rr  	contr_rows        r   contrast_allpairsr    s     E2YY $ $qsB 	$ 	$AIIaLIaLLL####		$
 8E??r   c                     t          j        t          j        | dz
            t          j        | dz
             f          }|S )zcontrast or restriction matrix for all against first comparison

    Parameters
    ----------
    nm : int

    Returns
    -------
    contr : ndarray, 2d, (nm-1, nm)
       contrast matrix for all against first comparisons

    r$   )rB   r   ro   r   )r  r  s     r   contrast_all_oner    s7     ORWRT]]RVBqD\\M:;;ELr   c                 ^    t          j        |           t          j        | | f          | z  z
  S )zcontrast or restriction matrix for all against mean comparison

    Parameters
    ----------
    nm : int

    Returns
    -------
    contr : ndarray, 2d, (nm-1, nm)
       contrast matrix for all against mean comparisons

    )rB   r   ro   )r  s    r   contrast_diff_meanr    s*     6"::B((+++r   c                     t          |          }t          j        ||j                  dz  }| t          j        d          z  t          j        |j        d                   z  }t          |||          S )Nr  r!   r   r/   )r  rB   r   r   rH   ro   rY  multicontrast_pvalues)	std_ranger  r/   r  corrtstats         r   tukey_pvaluesr    se     b!!E6%!!"$D

"RWTZ]%;%;;E 4444r   r   	two-sidedc           
         ddl m} ||dk    rt          d          t          j        |           } t          |           }t          j        |           }d || |||          z
  }g }	|D ]>}
|
t          j        |          z  }|	                    d || |||          z
             ?|t          j        |	          fS )z$pvalues for simultaneous tests

    r   )
mvstdtprobNr   z-df has to be specified for the t-distributionr$   )	.statsmodels.sandbox.distributions.multivariater  r-   rB   ry   rG   r   ro   rK   )r  tcorrr/   distalternativer  r   ccpval_globalr   tilimitss               r   r  r    s     JIIIII

HIIIJuEZZF	Bjj"R333KE 8 8BGFOO#QRCE26667777
5))))r   c                   >    e Zd ZdZd
dZd Zd Zd Zd Zd Z	d	 Z
dS )StepDowna0  a class for step down methods

    This is currently for simple tree subset descend, similar to homogeneous_subsets,
    but checks all leave-one-out subsets instead of assuming an ordered set.
    Comment in SAS manual:
    SAS only uses interval subsets of the sorted list, which is sufficient for range
    tests (maybe also equal variance and balanced sample sizes are required).
    For F-test based critical distances, the restriction to intervals is not sufficient.

    This version uses a single critical value of the studentized range distribution
    for all comparisons, and is therefore a step-down version of Tukey HSD.
    The class is written so it can be subclassed, where the get_distance_matrix and
    get_crit are overwritten to obtain other step-down procedures such as REGW.

    iter_subsets can be overwritten, to get a recursion as in the many to one comparison
    with a control such as in Dunnet's test.


    A one-sided right tail test is not covered because the direction of the inequality
    is hard coded in check_set.  Also Peritz's check of partitions is not possible, but
    I have not seen it mentioned in any more recent references.
    I have only partially read the step-down procedure for closed tests by Westfall.

    One change to make it more flexible, is to separate out the decision on a subset,
    also because the F-based tests, FREGW in SPSS, take information from all elements of
    a set and not just pairwise comparisons. I have not looked at the details of
    the F-based tests such as Sheffe yet. It looks like running an F-test on equality
    of means in each subset. This would also outsource how pairwise conditions are
    combined, any larger or max. This would also imply that the distance matrix cannot
    be calculated in advance for tests like the F-based ones.


    Nc                 f    || _         t          |          | _        || _        || _        || _        d S r  )r  rG   n_valsr  r  r/   )r   r  r  r  r/   s        r   r   zStepDown.__init__  s/    	$ii r   c                 r    t          | j        | j        |          }|t          j        | j                  z  S )zG
        get_tukeyQcrit

        currently tukey Q, add others
        r;   )r2   r  r/   rB   ro   )r   r0   r  s      r   get_critzStepDown.get_crit!  s3      TWEBBB,,,,r   c                 n    t          | j        | j        | j        | j                  }|d         | _        dS )zstudentized range statisticr  r   N)r  r  r  r  r/   distance_matrix)r   dress     r   get_distance_matrixzStepDown.get_distance_matrix,  s4     !DM4<DGTTT#Awr   c              #      K   t          t          |                    D ]/}t          j        |          }|                    |           |V  0dS )zIterate substepsN)r   rG   copypop)r   indicesiiidxsubs       r   iter_subsetszStepDown.iter_subsets2  sV      G%% 	 	BYw''FJJrNNNLLLL	 	r   c                 .   t          |          }|| j        v r| j        |         S | j        t          j        |          dddf         |f         }t          |          }t          j        || j        |dz
           k              rd}nd}|| j        |<   |S )zGcheck whether pairwise distances of indices satisfy condition

        Nr$   TF)tuplecache_resultr  rB   ry   rG   r   crit)r   r  indtupset_distance_matrix
n_elementsr   s         r   	check_setzStepDown.check_set:  s     wT&&&$V,,"&"6rz'7J7J111T67RT[7["\WJv)DIjl,CCDD (+Df%Jr   c                 r   t          |           |                     |          rkt          |          dk    r/|                     |          D ]}|                     |           dS | j                            t          |                     dS | j                            t          |                     |S )stepdownr!   N)	rJ   r  rG   r  r  rejectedrK   r
  accepted)r   r  subss      r   r  zStepDown.stepdownK  s    g>>'"" 	Gq   --g66 ( (DMM$''''( ( $$U7^^44444M  w000Nr   c                 d   i | _         |                     |          | _        g | _        g | _        |                                  |                     t          | j                             t          t          | j                            t          t          t          j                            fS )zmain function to run the test,

        could be done in __call__ instead
        this could have all the initialization code

        )r  r  r  r  r  r  r  r   r  listsetsd)r   r0   s     r   runzStepDown.runX  s     MM%((	  """fT[))***C&&''c"+.>.>)?)???r   r  )r  r  r  r  r   r  r  r  r  r  r  r   r   r   r  r    s           D   - - -' ' '    "  @ @ @ @ @r   r  c                   	
 t          |           t                    }g g 
t          j                  dk    rt          j        f          z  	
fd	 	| |          }fdt                    D             }t                    }t          t          |          |z
            }t          |          |t          t          
                    |fS )aQ  recursively check all pairs of vals for minimum distance

    step down method as in Newman-Keuls and Ryan procedures. This is not a
    closed procedure since not all partitions are checked.

    Parameters
    ----------
    vals : array_like
        values that are pairwise compared
    dcrit : array_like or float
        critical distance for rejecting, either float, or 2-dimensional array
        with distances on the upper triangle.

    Returns
    -------
    rejs : list of pairs
        list of pair-indices with (strictly) larger than critical difference
    nrejs : list of pairs
        list of pair-indices with smaller than critical difference
    lli : list of tuples
        list of subsets with smaller than critical difference
    res : tree
        result of all comparisons (for checking)


    this follows description in SPSS notes on Post-Hoc Tests

    Because of the recursive structure, some comparisons are made several
    times, but only unique pairs or sets are returned.

    Examples
    --------
    >>> m = [0, 2, 2.5, 3, 6, 8, 9, 9.5,10 ]
    >>> rej, nrej, ssli, res = homogeneous_subsets(m, 2)
    >>> set_partition(ssli)
    ([(5, 6, 7, 8), (1, 2, 3), (4,)], [0])
    >>> [np.array(m)[list(pp)] for pp in set_partition(ssli)[0]]
    [array([  8. ,   9. ,   9.5,  10. ]), array([ 2. ,  2.5,  3. ]), array([ 6.])]


    r$   c                 z   |d         |d         }}| d         | d         z
  ||f         k    rj                     |d         |d         f            | dd         |dd                    | dd         |dd                   |d         |d         fgS                      t          |                     |S )zwrecursive function for constructing homogeneous subset

        registers rejected and subsetli in outer scope
        r   r"   Nr$   )rK   r
  )r  indices_r   rr  dcritr  subsets	subsetslis       r   r  z$homogeneous_subsets.<locals>.subsets  s    
 Xb\18d1gac
**OOXa[(2,7888GD"Ix}55GDHhqrrl33a[(2,/1 1 U8__---Or   c                 F    g | ]}t          d z
  |d          D ]}||fS )r$   r"   )r   )r   r   rr  nvalss      r   r   z'homogeneous_subsets.<locals>.<listcomp>  s:    JJJ1eE!GAb6I6IJJ!AJJJJr   )rG   r   rB   r   ro   r   r  r  )r  r  r  r   	all_pairsrejsnot_rejectedr"  r  r  r   s    `     @@@@r   homogeneous_subsetsr&  m  s    V IIEe}}HHI	wu~~bguen---        '$
!
!CJJJJeJJJIx==DI-..L::|T#i..%9%93>>r   c                 v   g }t          t          t          |                     t                    ddd         D ]S}t          |                                          t          fd|D                       s|                    |           Tt          d | D             d |D             z
            }||fS )a  extract a partition from a list of tuples

    this should be correctly called select largest disjoint sets.
    Begun and Gabriel 1981 do not seem to be bothered by sets of accepted
    hypothesis with joint elements,
    e.g. maximal_accepted_sets = { {1,2,3}, {2,3,4} }

    This creates a set partition from a list of sets given as tuples.
    It tries to find the partition with the largest sets. That is, sets are
    included after being sorted by length.

    If the list does not include the singletons, then it will be only a
    partial partition. Missing items are singletons (I think).

    Examples
    --------
    >>> li
    [(5, 6, 7, 8), (1, 2, 3), (4, 5), (0, 1)]
    >>> set_partition(li)
    ([(5, 6, 7, 8), (1, 2, 3)], [0, 4])

    keyNr"   c              3   v   K   | ]3}t                                        t          |                    V  4d S r  )r  intersection)r   r   s_s     r   	<genexpr>z set_partition.<locals>.<genexpr>  s=      >>A3r77''A//>>>>>>r   c                     h | ]	}|D ]}|
S r   r   r   llr   s      r   	<setcomp>z set_partition.<locals>.<setcomp>  s%    111"b11A1111r   c                     h | ]	}|D ]}|
S r   r   r/  s      r   r1  z set_partition.<locals>.<setcomp>  s%    333B331a3333r   )sortedr  r  rG   r  r   rK   )sslipartsmissingr,  s       @r   set_partitionr8    s    . DDTOO---ddd3  VV[[]]>>>>>>>>> 	KKNNN 1111133$3334 5 5G=r   c                     g }t          t          t          |                     d           ddd         D ]2t          fd|D                       s|                               3|S )aK  remove sets that are subsets of another set from a list of tuples

    Parameters
    ----------
    ssli : list of tuples
        each tuple is considered as a set

    Returns
    -------
    part : list of tuples
        new list with subset tuples removed, it is sorted by set-length of tuples. The
        list contains original tuples, duplicate elements are not removed.

    Examples
    --------
    >>> set_remove_subs([(0, 1), (1, 2), (1, 2, 3), (0,)])
    [(1, 2, 3), (0, 1)]
    >>> set_remove_subs([(0, 1), (1, 2), (1,1, 1, 2, 3), (0,)])
    [(1, 1, 1, 2, 3), (0, 1)]

    c                 :    t          t          |                     S r  )rG   r  )rt   s    r   <lambda>z!set_remove_subs.<locals>.<lambda>  s    3s1vv;; r   r(  Nr"   c              3   v   K   | ]3}t                                        t          |                    V  4d S r  )r  issubset)r   r   r6  s     r   r-  z"set_remove_subs.<locals>.<genexpr>  s;      99q3q66??3q66**999999r   )r3  r  r  r   rK   )r4  r5  r6  s     @r   set_remove_subsr>    s    . DDTOO)>)>???"E   9999D99999 	KKNNN
 Kr   __main__)tukey	tukeycritfdrfdrmcrw  r   multicompdevr+  r@  )r   r   r$   r9      rB  rw  )example_fdr_bonferronirC  i     g?g333333?)r   r   r   r   r   r0   r   g      @g      @ry  r   g?)i  r  )rowvarrA  r  r;   gffffff@r(   gzG@rD  gQ@g(\@g@gzG@gQ@g
ףp=
@gףp=
@g(\@gQ@g333333@g=
ףp=@g      @gGz@g\(\@g)\(@gffffff@g{Gz@gp=
ף@c                 R    g | ]$}t           t           d d df         |k    df         %S )Nr$   r   )r  r   r.   s     r   r   r   -  s0    222!q111Q31~222r   r  c                 N    g | ]"}t           t          d d df         |k             #S )Nr$   )r   r  rK  s     r   r   r   /  s-    :::!F1QQQqS619%:::r   c                 ,    g | ]}t          |          S r   )rG   )r   xvals     r   r   r   0  s    444#d))444r   c                 6    g | ]}|                                 S r   )rC   r   items     r   r   r   1  s     666TTYY[[666r   c                 6    g | ]}|                                 S r   )rf   rP  s     r   r   r   2  s     4444DHHJJ444r   gIRk?g      ?z
sorted rank differencesz
kruskal for all pairs)use_continuityg?g?Tr   r   r      r   z#
groupmeanfilter and grouprankmeansz!
tiecorrection for data and ranksi  ri  z
pairs of mean rank differencesrj  z%
examples for kruskal multicomparisonr3  hs)r   last<   )r   r   r:   r:   )r  r   	   r   rX  )r  r  r  r  r  )ry  r  r  r  r  )g{G@gQ@g9#@gR1@)r  r  r  r  gMbP?   )g-C6?g-C6:?gŏ1w-!_?g~jt?g0*?gPkw?g2%䃞?g"u?g9m4?g#~j?gfc]F?g:M?gZӼ?g}?5^I?r   r   )r0   itergu!Va?gQI?fdr_gbsgx&1@   r  )r:   )r%   r   )r   r   r   r   r:   r%   r   )r   Fr   )Nr%   Nr  )NF)Nr   r  )r  collectionsr   statsmodels.compat.pythonr   r   r  rE   numpyrB   numpy.testingr   r   scipyr   r	   statsmodels.iolib.tabler
   statsmodels.stats.multitestr   r   r   r   r   r   statsmodels.graphicsr   statsmodels.tools.sm_exceptionsr   scipy.statsr   ImportErrorstatsmodels.stats.libqsturngr   r   studentized_range_tupler_   replacer   r   r   r?  r   cr   ccolsr*   r+   r,   r2   r4   r7   rd   rl   rv   r   r   r   r   r   r   r   r   r  rS  rf  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r&  r8  r>  r  examplesr   r   r   rt   rJ   ex_multicomprF  mcresrC   mcmeansrvsmvncorrcoefr  r  r   xlir   xrankslixnobsrp  sumranksrl  r   isfr   mrsr]  r  r  r   diffidxrk  r   rr  rv  mannwhitneyumwumwupvalr   r   r   r   r   r  r  r  r   r   gsr   r   r   r   gs2r   r   r   rG   r   rf   r   rY  ro  r   rq  r   rs  rH   rt  	multicompgsrx1x2skwr_r  ro   mc2newskwr  	ttest_indtablettresttarrtttablemwresmwarrmw	tablemwhsxrvsxrvsgr
multicomprru   gs4r   r  r  r  r   res_tstr   r   r   <module>r     su  > >~ # " " " " " 2 2 2 2 2 2 2 2       ; ; ; ; ; ; ; ; $ $ $ $ $ $ $ $ / / / / / / } } } } } } } } } } } } & & & & & & 8 8 8 8 8 8I------- I I I========(j)<udmLL//GHHHI	4 NMj @ @ F Ft L LMMM
"(3qt9U##	!B	!!!A#	!!!QTT'
	!!!QTT'
   21 1 1 1(* * *"C C CN!$ !$ !$H  " " "H! ! !J	 	 	 	("+ "+ "+ "+H   .! ! ! !R  b7 b7 b7 b7 b7 b7 b7 b7Hs s s s s s s slbM bM bM bM bM bM bM bMJ! ! !(  0&1 &1 &1 &1P*$ *$ *$ *$X. . .`1 1 1fF( F( F( F(R=# =# =# =#~57 57 57 57p  ,   , , ,5 5 5* * * *,s@ s@ s@ s@ s@ s@ s@ s@tF? F? F?P" " "J" " "J z( ( (H (BHgggY!BIOOAr$:$::nna !!! 	v11888888   (3d2RCtY\]]]"(5//&&q))ggajmQwqz"}_---gbqbk7233<((( Hh''kbk&+++,,,fjjmm hnnQ...555nnQ...555 !! BH !tQi !$ !T1I !ay !4) !Qi!"&!-11I!8<ay!CG)!Qi!"&!-11I!8<ay!CG)! Qi! #'! .21I! 9=ay! DH)! Qi	! #'	! .21I	! 9=ay	! DH)	!
 Qi!
 #'!
 .21I!
 9=ay!
 DH)! Qi! " " 32uuQqzz222!!!A#''::uuQqzz:::444445566X666	448444 	
)***
tbgi   1%%B)***c"gB   "*SWs2w.//"5BwK3r'{+++'(((3r'{BwK00 	G 	GCAaE!A}u}SVSV44555-5-c!fc!fUSSSLCE#wqy'!)G"3WQYv5EFFFF  bi!!!A#t<<<VBK''	BK!!!A#777	_y0
AAAaC&..""**,,"r{67;;;$s*Y6:M&1>5>!AAAaC&3I3I2NNN[T***4555b !!!mF#$$$ "	!AAAaC&>>>gk/"/1QQQqS67*;<<tLLL #r{6==#5#566\A-.uSS[[!!UAX-2244dAgnEEM+;5+;F+C+CBGGG2333mjj  !!!gaj
B<aSVo
.B<-U-f55
501113r'{BwK00 	= 	=CAaBF9Q<)A,677EVRVBuaU|O55566BE!Aub%(E"HVO<<<<#OQS)	k!d+++6777r 	N 	NAioob++hbh3x.@.@@CFB%-B''Cb"fruXRXcc"gg5F5FPSPSTVPWPWHXHX5X/YZZC[[]]FE#wrws1vAvs1va7LMMMM ) 5 5eo F F ) 5 5e6H I Ib			gb			g"+"7"78JSW"7"X"X	5%b			iyr!$$xrx0@0@0@'A'AADx}}f$_T622
 * 6 6u G Gg ]];;;{{{3HSMM	6k/"/1f+6677c  ""###
 RXAAABBFBH\\\""E	&%	-	-B= = =E 
E..d7
;
;
;<<<	E
 
 d
?
?
?@@@$$U$UCCCG6*GBKOA>>>LGAJNN$$%%%	E
 
 d
>
>
>???	E)]]5YGGGHHHM%B] s   A (A>=A>