
    M/Phg                         d Z ddlZddlZddlmZ ddlmZ  G d d          Z	d Z
	 	 dd	Z	 	 ddZddZd ZddZdS )zM
Created on Thu Apr  2 14:34:25 2020

Author: Josef Perktold
License: BSD-3

    N)stats)HolderTuplec                   N    e Zd ZdZd Z	 	 ddZddZd ZddZdd	Z		 	 ddZ
dS )CombineResultszResults from combined estimate of means or effect sizes

    This currently includes intermediate results that might be removed
    c                 |   | j                             |           t          |                                          | _        | j        dz
  | _        t          j        | j	                  | _
        t          j        | j                  | _        | j        | j        dz
  z  | _        dd| j        z  z
  | _        i | _        d S )N   )__dict__updatelistkeys	_ini_keyskdf_residnpsqrtvar_hksj_fesd_eff_w_fe_hksjvar_hksj_resd_eff_w_re_hksjqh2i2cache_ci)selfkwdss     _/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/stats/meta_analysis.py__init__zCombineResults.__init__   s    T"""diikk**
 !#(8 9 9 "(8 9 9 &DFQJ'a$'k/     皙?Nc                 4   ||f| j         v r| j         ||f         S || j        }||d|ini } |dd|i|}d| _        n|du r*t          j                            |dz            }d| _        nv|0|dz
  }t          j                            |dz  |          }d	| _        nDd
}	ddl}
|
                    |	           t          j                            |dz            }d| _        | j	        || j
        z  z
  }| j	        || j
        z  z   }||f}|| j         ||f<   |S )a   confidence intervals for the effect size estimate of samples

        Additional information needs to be provided for confidence intervals
        that are not based on normal distribution using available variance.
        This is likely to change in future.

        Parameters
        ----------
        alpha : float in (0, 1)
            Significance level for confidence interval. Nominal coverage is
            ``1 - alpha``.
        use_t : None or bool
            If use_t is None, then the attribute `use_t` determines whether
            normal or t-distribution is used for confidence intervals.
            Specifying use_t overrides the attribute.
            If use_t is false, then confidence intervals are based on the
            normal distribution. If it is true, then the t-distribution is
            used.
        nobs : None or float
            Number of observations used for degrees of freedom computation.
            Only used if use_t is true.
        ci_func : None or callable
            User provided function to compute confidence intervals.
            This is not used yet and will allow using non-standard confidence
            intervals.

        Returns
        -------
        ci_eff : tuple of ndarrays
            Tuple (ci_low, ci_upp) with confidence interval computed for each
            sample.

        Notes
        -----
        CombineResults currently only has information from the combine_effects
        function, which does not provide details about individual samples.
        Nuse_talphaci_funcF   normalr   tz`use_t=True` requires `nobs` for each sample or `ci_func`. Using normal distribution for confidence interval of individual samples.r    )r   r!   ci_sample_distrr   normisfr&   warningswarneffsd_eff)r   r"   r!   nobsr#   r   ci_effcritr   msgr+   ci_lowci_upps                r   conf_int_sampleszCombineResults.conf_int_samples'   s]   T 5>T]**=%00=JE','8GU##bDW1151D11F#,D  ~~z~~eai00'/$$##axH 7;;uqy(;;D+.D((HC $OOOMM#&&& :>>%!)44D+3D( Xt{ 22FXt{ 22Ff%F )/uen%r   c                    || j         }|du r#t          j                            |dz            }n(t          j                            |dz  | j                  }t          j        ddg          }| j        }| j	        }|||z  | j
        z  z   }|||z  | j        z  z   }|||z  t          j        | j                  z  z   }	|||z  t          j        | j                  z  z   }
|||	|
fS )a  confidence interval for the overall mean estimate

        Parameters
        ----------
        alpha : float in (0, 1)
            Significance level for confidence interval. Nominal coverage is
            ``1 - alpha``.
        use_t : None or bool
            If use_t is None, then the attribute `use_t` determines whether
            normal or t-distribution is used for confidence intervals.
            Specifying use_t overrides the attribute.
            If use_t is false, then confidence intervals are based on the
            normal distribution. If it is true, then the t-distribution is
            used.

        Returns
        -------
        ci_eff_fe : tuple of floats
            Confidence interval for mean effects size based on fixed effects
            model with scale=1.
        ci_eff_re : tuple of floats
            Confidence interval for mean effects size based on random effects
            model with scale=1
        ci_eff_fe_wls : tuple of floats
            Confidence interval for mean effects size based on fixed effects
            model with estimated scale corresponding to WLS, ie. HKSJ.
        ci_eff_re_wls : tuple of floats
            Confidence interval for mean effects size based on random effects
            model with estimated scale corresponding to WLS, ie. HKSJ.
            If random effects method is fully iterated, i.e. Paule-Mandel, then
            the estimated scale is 1.

        NFr$   r   )r!   r   r)   r*   r&   r   r   asarraymean_effect_femean_effect_resd_eff_w_fesd_eff_w_rer   r   r   )r   r"   r!   r1   sgnm_fem_re	ci_eff_fe	ci_eff_reci_eff_fe_wlsci_eff_re_wlss              r   conf_intzCombineResults.conf_intw   s    D =JEE>>:>>%!),,DD7;;uqy$-88Dj"a!!""3:(888	3:(888	sTzBGD4D,E,EEEsTzBGD4D,E,EEE)]MAAr   c                     t           j                            | j        | j        dz
            }t          | j        || j        dz
  d          }|S )a  Test whether the means of all samples are the same

        currently no options, test uses chisquare distribution
        default might change depending on `use_t`

        Returns
        -------
        res : HolderTuple instance
            The results include the following attributes:

            - statistic : float
                Test statistic, ``q`` in meta-analysis, this is the
                pearson_chi2 statistic for the fixed effects model.
            - pvalue : float
                P-value based on chisquare distribution.
            - df : float
                Degrees of freedom, equal to number of studies or samples
                minus 1.
        r   chi2)	statisticpvaluedfdistr)r   rF   sfr   r   r   )r   rH   ress      r   test_homogeneityzCombineResults.test_homogeneity   sO    ( tvtvz22DF!'!VaZ &( ( ( 
r   c                    |                      ||          \  }}t          j        | j        | j        ||| j        | j        g          }|                     ||          }| j        | j	        |d         d         |d         d         dt          j
        gg}| j        | j        |d         d         |d         d         t          j
        dgg}| j        | j        |d         d         |d         d         dt          j
        gg}	| j        | j        |d         d         |d         d         t          j
        dgg}
t          j        ||||	|
gd          }g d}||fS )a  Create array with sample statistics and mean estimates

        Parameters
        ----------
        alpha : float in (0, 1)
            Significance level for confidence interval. Nominal coverage is
            ``1 - alpha``.
        use_t : None or bool
            If use_t is None, then the attribute `use_t` determines whether
            normal or t-distribution is used for confidence intervals.
            Specifying use_t overrides the attribute.
            If use_t is false, then confidence intervals are based on the
            normal distribution. If it is true, then the t-distribution is
            used.

        Returns
        -------
        res : ndarray
            Array with columns
            ['eff', "sd_eff", "ci_low", "ci_upp", "w_fe","w_re"].
            Rows include statistics for samples and estimates of overall mean.
        column_names : list of str
            The names for the columns, used when creating summary DataFrame.
        r"   r!   r   r   r$      )axis)r-   r.   r3   r4   w_few_re)r5   r   column_stackr-   r.   weights_rel_feweights_rel_rerD   r9   r;   nanr:   r<   r   r   concatenate)r   r"   r!   r3   r4   rL   cires_feres_re
res_fe_wls
res_re_wlscolumn_namess               r   summary_arrayzCombineResults.summary_array   sn   4 ..U%.HHotx%v#2D4GI J J ]]e]44&(8a58RU1Xq"&2 3&(8a58RU1Xrvq2 3*D,A!uQxAq1bf6 7
*D,A!uQxAq2616 7
 nc66:zJ"#% % %LLLL  r   c                     || j         }t          | j                  g dz   }|                     ||          \  }}t	          j        |||          }|S )a|  Create DataFrame with sample statistics and mean estimates

        Parameters
        ----------
        alpha : float in (0, 1)
            Significance level for confidence interval. Nominal coverage is
            ``1 - alpha``.
        use_t : None or bool
            If use_t is None, then the attribute `use_t` determines whether
            normal or t-distribution is used for confidence intervals.
            Specifying use_t overrides the attribute.
            If use_t is false, then confidence intervals are based on the
            normal distribution. If it is true, then the t-distribution is
            used.

        Returns
        -------
        res : DataFrame
            pandas DataFrame instance with columns
            ['eff', "sd_eff", "ci_low", "ci_upp", "w_fe","w_re"].
            Rows include statistics for samples and estimates of overall mean.

        N)zfixed effectzrandom effectzfixed effect wlszrandom effect wlsrO   )indexcolumns)r!   r   	row_namesr_   pd	DataFrame)r   r"   r!   labelsrL   	col_namesresultss          r   summary_framezCombineResults.summary_frame   sl    0 =JEt~&&< < << ++%u+EEY,s&)DDDr   Fc                    ddl m} |                     ||          }|rt          j        |g d                   }t          j        |ddg         |dg         j        z
            } |d	|d         ||j        |j        d|}	|	S )
aU  Forest plot with means and confidence intervals

        Parameters
        ----------
        ax : None or matplotlib axis instance
            If ax is provided, then the plot will be added to it.
        alpha : float in (0, 1)
            Significance level for confidence interval. Nominal coverage is
            ``1 - alpha``.
        use_t : None or bool
            If use_t is None, then the attribute `use_t` determines whether
            normal or t-distribution is used for confidence intervals.
            Specifying use_t overrides the attribute.
            If use_t is false, then confidence intervals are based on the
            normal distribution. If it is true, then the t-distribution is
            used.
        use_exp : bool
            If `use_exp` is True, then the effect size and confidence limits
            will be exponentiated. This transform log-odds-ration into
            odds-ratio, and similarly for risk-ratio.
        ax : AxesSubplot, optional
            If given, this axes is used to plot in instead of a new figure
            being created.
        kwds : optional keyword arguments
            Keywords are forwarded to the dot_plot function that creates the
            plot.

        Returns
        -------
        fig : Matplotlib figure instance

        See Also
        --------
        dot_plot

        r   )dot_plotrO   )r-   r3   r4   r3   r4   r-   )points	intervalslines
line_orderr'   )statsmodels.graphics.dotplotsrk   ri   r   expabsvaluesra   )
r   r"   r!   use_expaxr   rk   res_dfhwfigs
             r   plot_forestzCombineResults.plot_forest  s    L 	;:::::##%u#== 	AVF#>#>#>?@@FVFHh/065'?3IIJJh LfUmr#\flL LFJL L
r   )r   NNN)r   N)r   NFN)__name__
__module____qualname____doc__r   r5   rD   rM   r_   ri   ry   r'   r   r   r   r      s         
  " =A!%N N N N`3B 3B 3B 3Bj  6,! ,! ,! ,!\   B ;@- - - - - -r   r   c                     |dz  |dz
  z  |dz  |dz
  z  z   ||z   dz
  z  }t          j        |          }||z   }ddd|z  dz
  z  z
  }	| |z
  |z  }
|	|
z  }||z  |z  |dz  dz  |dz
  z  z   }||fS )a  effect sizes for mean difference for use in meta-analysis

    mean1, sd1, nobs1 are for treatment
    mean2, sd2, nobs2 are for control

    Effect sizes are computed for the mean difference ``mean1 - mean2``
    standardized by an estimate of the within variance.

    This does not have option yet.
    It uses standardized mean difference with bias correction as effect size.

    This currently does not use np.asarray, all computations are possible in
    pandas.

    Parameters
    ----------
    mean1 : array
        mean of second sample, treatment groups
    sd1 : array
        standard deviation of residuals in treatment groups, within
    nobs1 : array
        number of observations in treatment groups
    mean2, sd2, nobs2 : arrays
        mean, standard deviation and number of observations of control groups

    Returns
    -------
    smd_bc : array
        bias corrected estimate of standardized mean difference
    var_smdbc : array
        estimate of variance of smd_bc

    Notes
    -----
    Status: API will still change. This is currently intended for support of
    meta-analysis.

    References
    ----------
    Borenstein, Michael. 2009. Introduction to Meta-Analysis.
        Chichester: Wiley.

    Chen, Ding-Geng, and Karl E. Peace. 2013. Applied Meta-Analysis with R.
        Chapman & Hall/CRC Biostatistics Series.
        Boca Raton: CRC Press/Taylor & Francis Group.

    r$   r   rP      	   gQ@)r   r   )mean1sd1nobs1mean2sd2nobs2var_diffsd_diffr/   bias_correctionsmdsmd_bc	var_smdbcs                r   effectsize_smdr   F  s    l Q%!)$Q%!)$%).):<HghG5=D!q4x!|,,O5=G
#Cs"Fuu$vqy1}t'DDI9r   diffc                 z   |dx}}nO|dk    r||z   }	||	z  }||	z  }n9|dk    r|                     dd          }
dx}}n|r|x}}nd}t          |          | dk    | |k    z  }|dk    ||k    z  }t          j        ||          }|||z   |z  z   }|||z   |z  z   }| |z   |z  }||z   |z  }|dk    r$t          j        |g|
R  }t          j        |g|
R  }|dv r"||z
  }|d	|z
  z  |z  |d	|z
  z  |z  z   }|}|}n=|d
v rEt          j        |          t          j        |          z
  }d	|z
  |z  |z  d	|z
  |z  |z  z   }|}|}n|dv r{t          j        |          t          j        d	|z
            z
  t          j        |          z
  t          j        d	|z
            z   }d	|d	|z
  z  |z  z  d	|d	|z
  z  |z  z  z   }|}|}nu|dv r`t          j        t          j        |                    t          j        t          j        |                    z
  }d	|z  d	|z  z   dz  }|}|}nd}t          |          ||fS )a  Effects sizes for two sample binomial proportions

    Parameters
    ----------
    count1, nobs1, count2, nobs2 : array_like
        data for two samples
    statistic : {"diff", "odds-ratio", "risk-ratio", "arcsine"}
        statistic for the comparison of two proportions
        Effect sizes for "odds-ratio" and "risk-ratio" are in logarithm.
    zero_correction : {None, float, "tac", "clip"}
        Some statistics are not finite when zero counts are in the data.
        The options to remove zeros are:

        * float : if zero_correction is a single float, then it will be added
          to all count (cells) if the sample has any zeros.
        * "tac" : treatment arm continuity correction see Ruecker et al 2009,
          section 3.2
        * "clip" : clip proportions without adding a value to all cells
          The clip bounds can be set with zero_kwds["clip_bounds"]

    zero_kwds : dict
        additional options to handle zero counts
        "clip_bounds" tuple, default (1e-6, 1 - 1e-6) if zero_correction="clip"
        other options not yet implemented

    Returns
    -------
    effect size : array
        Effect size for each sample.
    var_es : array
        Estimate of variance of the effect size

    Notes
    -----
    Status: API is experimental, Options for zero handling is incomplete.

    The names for ``statistics`` keyword can be shortened to "rd", "rr", "or"
    and "as".

    The statistics are defined as:

     - risk difference = p1 - p2
     - log risk ratio = log(p1 / p2)
     - log odds_ratio = log(p1 / (1 - p1) * (1 - p2) / p2)
     - arcsine-sqrt = arcsin(sqrt(p1)) - arcsin(sqrt(p2))

    where p1 and p2 are the estimated proportions in sample 1 (treatment) and
    sample 2 (control).

    log-odds-ratio and log-risk-ratio can be transformed back to ``or`` and
    `rr` using `exp` function.

    See Also
    --------
    statsmodels.stats.contingency_tables
    Nr   tacclipclip_bounds)gư>g!?z+zero_correction not recognized or supported)r   rdr   )z
risk-ratiorr)z
odds-ratioor)arcsinearcsinasr   z;statistic not recognized, use one of "rd", "rr", "or", "as")getNotImplementedErrorr   
logical_orr   logr   r   )count1r   count2r   rG   zero_correction	zero_kwdscc1cc2nobs_tr   r2   
zero_mask1
zero_mask2zmaskn1n2p1p2r   rd_varr-   var_efflog_rr
log_rr_varlog_or
log_or_varas_as_vars                                r   effectsize_2proportionsr     s   t cc	E	!	!fnfn	F	"	"mmM3CDDcc	 '##cc;!#&&&A+&E/2JA+&E/2JM*j11E	#)u$	$B	#)u$	$B
3,2	B
3,2	B&  WR&+&&&WR&+&&&N"""Wq2v#bAFmb&88	*	*	*bfRjj("f]R'1r6R-"*<<
	*	*	*bfQVnn,rvbzz9BF1r6NNJ"B-",-R1r6]R5G0HH
	1	1	1i$$ry'='==b&1r6/Q&K!#&&&<r   iteratedFr   c                    t          |           }|t          t          |                    }t          j                            |dz            }| }	|}
t          j        |
          }d|
z  }|                    d          }||z  }||	z  }|                                }d|z  }t          j        |          }||	dz  z                      d          }|||	z                                  dz  |z  z  }|dz
  }|	                                dv rt          |	|
fi |\  }}nK|	                                dv r&||dz                                  |z  z
  }||z
  |z  }nt          d          d|
|z   z  }|                    d          }||                    d          z  }||	z  }|                                }d|z  }t          j        |          }||	|z
  dz  z                                  |z  }||	|z
  dz  z                                  |z  } ||	|z
  dz  z                                  |z  }!||	|z
  dz  z                                  |z  }"t          di t                      }#|#S )	aJ
  combining effect sizes for effect sizes using meta-analysis

    This currently does not use np.asarray, all computations are possible in
    pandas.

    Parameters
    ----------
    effect : array
        mean of effect size measure for all samples
    variance : array
        variance of mean or effect size measure for all samples
    method_re : {"iterated", "chi2"}
        method that is use to compute the between random effects variance
        "iterated" or "pm" uses Paule and Mandel method to iteratively
        estimate the random effects variance. Options for the iteration can
        be provided in the ``kwds``
        "chi2" or "dl" uses DerSimonian and Laird one-step estimator.
    row_names : list of strings (optional)
        names for samples or studies, will be included in results summary and
        table.
    alpha : float in (0, 1)
        significance level, default is 0.05, for the confidence intervals

    Returns
    -------
    results : CombineResults
        Contains estimation results and intermediate statistics, and includes
        a method to return a summary table.
        Statistics from intermediate calculations might be removed at a later
        time.

    Notes
    -----
    Status: Basic functionality is verified, mainly compared to R metafor
    package. However, API might still change.

    This computes both fixed effects and random effects estimates. The
    random effects results depend on the method to estimate the RE variance.

    Scale estimate
    In fixed effects models and in random effects models without fully
    iterated random effects variance, the model will in general not account
    for all residual variance. Traditional meta-analysis uses a fixed
    scale equal to 1, that might not produce test statistics and
    confidence intervals with the correct size. Estimating the scale to account
    for residual variance often improves the small sample properties of
    inference and confidence intervals.
    This adjustment to the standard errors is often referred to as HKSJ
    method based attributed to Hartung and Knapp and Sidik and Jonkman.
    However, this is equivalent to estimating the scale in WLS.
    The results instance includes both, fixed scale and estimated scale
    versions of standard errors and confidence intervals.

    References
    ----------
    Borenstein, Michael. 2009. Introduction to Meta-Analysis.
        Chichester: Wiley.

    Chen, Ding-Geng, and Karl E. Peace. 2013. Applied Meta-Analysis with R.
        Chapman & Hall/CRC Biostatistics Series.
        Boca Raton: CRC Press/Taylor & Francis Group.

    Nr$   r   r   )r   pm)rF   dlz(method_re should be "iterated" or "chi2"r'   )lenr   ranger   r)   r*   r   r   sumlower_fit_tau_iterative
ValueErrorr   locals)$effectvariance	method_rerc   r!   r"   r   r   r1   r-   r   r.   
weights_fe
w_total_ferU   eff_w_fer9   var_eff_w_fer;   r   rI   tau2_c
weights_re
w_total_rerV   eff_w_rer:   var_eff_w_rer<   scale_hksj_rescale_hksj_fer   r   rL   s$                                       r   combine_effectsr     s   D 	FAqNN	:>>%!)$$D CGWWF WJ""J*,N#H\\^^Nz>L',''K 
c1f	!!!$$A*s
			!	!1	$z	11A	
QB...$S'::T::aa			n	,	,*a-,,..;;B!|CDDDgn%J""J*.."3"33N#H\\^^Nz>L',''K  3#7!";;@@BBRGM3#7!";;@@BBRGM!S>%9A$==BBDDrIK!S>%9A$==BBDDrIK

$
$688
$
$CJr   h㈵>2   c                    |}| j         d         }d}t          |          D ]}d||z   z  }	|	                    |           |	                    d          z  }
| |
z
  dz  }|	                    |          }||dz
  z
  }|dk     rd}d} n<t	          j        |d|          rd} n!||	dz                      |          z  }||z  }||fS )a  Paule-Mandel iterative estimate of between random effect variance

    implementation follows DerSimonian and Kacker 2007 Appendix 8
    see also Kacker 2004

    Parameters
    ----------
    eff : ndarray
        effect sizes
    var_eff : ndarray
        variance of effect sizes
    tau2_start : float
        starting value for iteration
    atol : float, default: 1e-5
        convergence tolerance for absolute value of estimating equation
    maxiter : int
        maximum number of iterations

    Returns
    -------
    tau2 : float
        estimate of random effects variance tau squared
    converged : bool
        True if iteration has converged.

    r   Fr   r$   atolT)shaper   dotr   r   allclose)r-   r   
tau2_startr   maxiterr   r   	convergediwmresid_sqq_weedeltas                  r   r   r   t  s    6 D	!AI7^^  4 EE#JJq!!Ga<eeHooAE]66DIE;r14((( 	IEadZZ)))?r   c                 n   |}|                     |           |                    d          z  }| |z
  dz  }|                     |          }|                                }|                     |          |dz                       |          |z  z
  }||dz                                  |z  z
  }	||z
  |	z  }
|
S )a  one-step method of moment estimate of between random effect variance

    implementation follows Kacker 2004 and DerSimonian and Kacker 2007 eq. 6

    Parameters
    ----------
    eff : ndarray
        effect sizes
    var_eff : ndarray
        variance of effect sizes
    weights : ndarray
        weights for estimating overall weighted mean

    Returns
    -------
    tau2 : float
        estimate of random effects variance tau squared

    r   r$   )r   r   )r-   r   weightsr   r   r   r   w_texpectdenomr   s              r   _fit_tau_mmr     s    ( 	A	c

QUU1XXAa!|H
%%//C
%%''CUU7^^q!tjj11C77F1a4**,,$$E&LE!DKr   c                     |}d}t          |          D ]M}d||z   z  }t          | ||          }	t          d|	          }	|	|z
  }
t          j        |
d|          rd} n|	}N||fS )a  iterated method of moment estimate of between random effect variance

    This repeatedly estimates tau, updating weights in each iteration
    see two-step estimators in DerSimonian and Kacker 2007

    Parameters
    ----------
    eff : ndarray
        effect sizes
    var_eff : ndarray
        variance of effect sizes
    tau2_start : float
        starting value for iteration
    atol : float, default: 1e-5
        convergence tolerance for change in tau2 estimate between iterations
    maxiter : int
        maximum number of iterations

    Returns
    -------
    tau2 : float
        estimate of random effects variance tau squared
    converged : bool
        True if iteration has converged.

    Fr   r   r   T)r   r   maxr   r   )r-   r   r   r   r   r   r   r   r   tau2_newr   s              r   _fit_tau_iter_mmr     s    6 DI7^^  4 sGQ//q(##4;uad+++ 	IE?r   )r   NN)r   NFr   )r   r   r   )r}   numpyr   pandasrd   scipyr   statsmodels.stats.baser   r   r   r   r   r   r   r   r'   r   r   <module>r      s                  . . . . . .s s s s s s s sl	> > >B EK<@q q q qh GK'+v v v vr0 0 0 0f  D* * * * * *r   