
    M/Ph@                     `    d Z ddlZddlZddlmZ  G d d          Zd Z G d d          Z	dS )	a  
Mediation analysis

Implements algorithm 1 ('parametric inference') and algorithm 2
('nonparametric inference') from:

Imai, Keele, Tingley (2010).  A general approach to causal mediation
analysis. Psychological Methods 15:4, 309-334.

http://imai.princeton.edu/research/files/BaronKenny.pdf

The algorithms are described on page 317 of the paper.

In the case of linear models with no interactions involving the
mediator, the results should be similar or identical to the earlier
Barron-Kenny approach.
    N)maybe_name_or_idxc                   N    e Zd ZdZ	 	 	 ddZd Zd Zd Zd Zd Z	dd
Z
ddZdS )	MediationaK  
    Conduct a mediation analysis.

    Parameters
    ----------
    outcome_model : statsmodels model
        Regression model for the outcome.  Predictor variables include
        the treatment/exposure, the mediator, and any other variables
        of interest.
    mediator_model : statsmodels model
        Regression model for the mediator variable.  Predictor
        variables include the treatment/exposure and any other
        variables of interest.
    exposure : str or (int, int) tuple
        The name or column position of the treatment/exposure
        variable.  If positions are given, the first integer is the
        column position of the exposure variable in the outcome model
        and the second integer is the position of the exposure variable
        in the mediator model.  If a string is given, it must be the name
        of the exposure variable in both regression models.
    mediator : {str, int}
        The name or column position of the mediator variable in the
        outcome regression model.  If None, infer the name from the
        mediator model formula (if present).
    moderators : dict
        Map from variable names or index positions to values of
        moderator variables that are held fixed when calculating
        mediation effects.  If the keys are index position they must
        be tuples `(i, j)` where `i` is the index in the outcome model
        and `j` is the index in the mediator model.  Otherwise the
        keys must be variable names.
    outcome_fit_kwargs : dict-like
        Keyword arguments to use when fitting the outcome model.
    mediator_fit_kwargs : dict-like
        Keyword arguments to use when fitting the mediator model.
    outcome_predict_kwargs : dict-like
        Keyword arguments to use when calling predict on the outcome
        model.

    Returns a ``MediationResults`` object.

    Notes
    -----
    The mediator model class must implement ``get_distribution``.

    Examples
    --------
    A basic mediation analysis using formulas:

    >>> import statsmodels.api as sm
    >>> import statsmodels.genmod.families.links as links
    >>> probit = links.probit
    >>> outcome_model = sm.GLM.from_formula("cong_mesg ~ emo + treat + age + educ + gender + income",
    ...                                     data, family=sm.families.Binomial(link=Probit()))
    >>> mediator_model = sm.OLS.from_formula("emo ~ treat + age + educ + gender + income", data)
    >>> med = Mediation(outcome_model, mediator_model, "treat", "emo").fit()
    >>> med.summary()

    A basic mediation analysis without formulas.  This may be slightly
    faster than the approach using formulas.  If there are any
    interactions involving the treatment or mediator variables this
    approach will not work, you must use formulas.

    >>> import patsy
    >>> outcome = np.asarray(data["cong_mesg"])
    >>> outcome_exog = patsy.dmatrix("emo + treat + age + educ + gender + income", data,
    ...                              return_type='dataframe')
    >>> probit = sm.families.links.probit
    >>> outcome_model = sm.GLM(outcome, outcome_exog, family=sm.families.Binomial(link=Probit()))
    >>> mediator = np.asarray(data["emo"])
    >>> mediator_exog = patsy.dmatrix("treat + age + educ + gender + income", data,
    ...                               return_type='dataframe')
    >>> mediator_model = sm.OLS(mediator, mediator_exog)
    >>> tx_pos = [outcome_exog.columns.tolist().index("treat"),
    ...           mediator_exog.columns.tolist().index("treat")]
    >>> med_pos = outcome_exog.columns.tolist().index("emo")
    >>> med = Mediation(outcome_model, mediator_model, tx_pos, med_pos).fit()
    >>> med.summary()

    A moderated mediation analysis.  The mediation effect is computed
    for people of age 20.

    >>> fml = "cong_mesg ~ emo + treat*age + emo*age + educ + gender + income",
    >>> outcome_model = sm.GLM.from_formula(fml, data,
    ...                                      family=sm.families.Binomial())
    >>> mediator_model = sm.OLS.from_formula("emo ~ treat*age + educ + gender + income", data)
    >>> moderators = {"age" : 20}
    >>> med = Mediation(outcome_model, mediator_model, "treat", "emo",
    ...                 moderators=moderators).fit()

    References
    ----------
    Imai, Keele, Tingley (2010).  A general approach to causal mediation
    analysis. Psychological Methods 15:4, 309-334.
    http://imai.princeton.edu/research/files/BaronKenny.pdf

    Tingley, Yamamoto, Hirose, Keele, Imai (2014).  mediation : R
    package for causal mediation analysis.  Journal of Statistical
    Software 59:5.  http://www.jstatsoft.org/v59/i05/paper
    Nc	                    || _         || _        || _        ||ni | _        ||                     |d          | _        n|| _        |	 |ni | _        |	 |ni | _        ||ni | _        |j	        
                                | _        |j	        
                                | _        |                     dd          | _        |                     dd          | _        |                     dd          | _        d S )Nmediatorexposureoutcome)outcome_modelmediator_modelr   
moderators_guess_endog_namer   _outcome_fit_kwargs_mediator_fit_kwargs_outcome_predict_kwargsexogcopy_outcome_exog_mediator_exog_variable_pos_exp_pos_mediator_exp_pos_outcome_med_pos_outcome)	selfr
   r   r   r   r   outcome_fit_kwargsmediator_fit_kwargsoutcome_predict_kwargss	            [/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/stats/mediation.py__init__zMediation.__init__}   s(    +, (2(>**B 22>:NNDMM$DM:L;$6$6!# 	 <O=%8%8!# 	! '2 )?(>8: 	$ +/4466,16688 "&!3!3J
!K!K !% 2 2:y I I !% 2 2:y I I    c                 :   |dk    r| j         }n| j        }|dk    rt          | j        |          d         S | j        }t          |          dk    ot          |t                     }|r|dk    r|d         S |dk    r|d         S d S t          ||          d         S )Nr         r	   r   )r   r
   r   r   r   len
isinstancestr)r   varmodelmodexpexp_is_2s         r   r   zMediation._variable_pos   s    J%CC$C*$T]C88;;mXX]@JsC,@,@(@ 	2	!!1v*$$1v %$ %S#..q11r   c                     t          |d          r2|j                            d          d                                         S t	          d|z            )Nformula~r   z$cannot infer %s name without formula)hasattrr,   splitstrip
ValueError)r   r'   typs      r   r   zMediation._guess_endog_name   sP    5)$$ 	K=&&s++A.44666CcIJJJr   c                 x    |j         }|                                }t          j                            ||          S )zN
        Simulate model parameters from fitted sampling distribution.
        )params
cov_paramsnprandommultivariate_normal)r   resultmncovs       r   _simulate_paramszMediation._simulate_params   s4     ]!!y,,R555r   c                    | j         }t          | j        d          s5||dd| j        f<   | j        D ]}| j        |         }||dd|d         f<   n| j        j        j                                        }||| j        <   | j        D ]}| j        |         }||j	        dd|f<   | j        j
        }| j                                        } |j        dd|i|}	|	j        }|S )z
        Return the mediator exog matrix with exposure set to the given
        value.  Set values of moderated variables as needed.
        r,   Nr!   data )r   r.   r   r   r   r>   framer   r   loc	__class___get_init_kwdsfrom_formular   )
r   r   mediator_exogixvdfvnameklassinit_kwargsr'   s
             r   _get_mediator_exogzMediation._get_mediator_exog   s   
 +t*I66 	'7?M!!!T334o , ,OB'*+aaaAh'',
 $)/4466B (Bt} % %OE*#$qqq%x  '1E-<<>>K&E&>>B>+>>E!JMr   c                    | j         }t          | j        d          sC||dd| j        f<   ||dd| j        f<   | j        D ]}| j        |         }||dd|d         f<   n| j        j        j                                        }||| j	        <   ||| j
        <   | j        D ]}| j        |         }|||<   | j        j        }| j                                        }	 |j        dd|i|	}
|
j        }|S )z
        Retun the exog design matrix with mediator and exposure set to
        the given values.  Set values of moderated variables as
        needed.
        r,   Nr   r>   r?   )r   r.   r
   r   r   r   r>   r@   r   r   r   rB   rC   rD   r   )r   r   r   outcome_exogrF   rG   rH   rI   rJ   rK   r'   s              r   _get_outcome_exogzMediation._get_outcome_exog   s-    )t)955 	&5=LD1125=LD112o + +OB')*QQQ1X&&+
 #(.3355B (Bt} (Bt}  OE*5		&0E,;;==K&E&>>B>+>>E :Lr   Fc                 $   |j         }|                                }|j        }|j        }|rOt          j                            dt          |          t          |                    }||         }||d d f         } |||fi |}	 |	j        di |S )Nr   r?   )	rB   rC   endogr   r6   r7   randintr#   fit)
r   r'   
fit_kwargsbootrJ   rK   rQ   r   iir
   s
             r   
_fit_modelzMediation._fit_model   s    **,,z 	""1c%jj#e**==B"IEAAA;DeT99[99 } ..:...r   
parametric  c                 n   |                     d          rA|                     | j        | j                  }|                     | j        | j                  }n$|                     d          st          d          g g g}g g g}t          |          D ]}|dk    r+|                     |          }|                     |          }	nR|                     | j        | j        d          }|j	        }|                     | j        | j        d          }|j	        }	ddgddgg}
dD ]}| 
                    |          }d	|i}t          |d
          r
|j        |d
<    | j        j        |	fi |}|                    |j        d                   }dD ]<}|                     ||          } | j        j        ||fi | j        }||
|         |<   =dD ]n}||                             |
d         |         |
d         |         z
             ||                             |
|         d         |
|         d         z
             odD ]F}t)          j        ||                   j        ||<   t)          j        ||                   j        ||<   G|| _        || _        t3          | j        | j                  }||_        |S )a  
        Fit a regression model to assess mediation.

        Parameters
        ----------
        method : str
            Either 'parametric' or 'bootstrap'.
        n_rep : int
            The number of simulation replications.

        Returns a MediationResults object.
        pararU   z1method must be either 'parametric' or 'bootstrap'rX   T)rU   Nr   r!   r   scaler   r!   )
startswithrW   r
   r   r   r   r1   ranger<   r4   rL   r.   r]   get_distributionrvsshaperO   predictr   appendr6   asarrayTindirect_effectsdirect_effectsMediationResultsmethod)r   rj   n_repoutcome_resultmediator_resultrg   rh   iteroutcome_paramsmediation_paramspredicted_outcomestmmexkwargsgenpotential_mediatorteoexpotrslts                        r   rS   zMediation.fit	  sO    V$$ 	!__T-?AYZZN"ood.A4C\]]OO""6** 	C   8b%LL %	^ %	^D%%!%!6!6~!F!F $(#8#8#I#I  !%1C151IPT "1 "V "V!/!6"&//$2E262KRV #2 #X #X#2#9 
 $(,t!= 4 4--b11 #?G44 <&5&;F7O:d):;K E E=CE E%(WWSYq\%:%:" 4 4B005GHHC3+3NC < <":< <B13&r*2..	4  ^ ^ #**+=a+@+CFXYZF[\]F^+^___q!(();A)>q)ADVWXDYZ[D\)\]]]]^  	@ 	@A"$*-=a-@"A"A"CQ "
>!+< = = ?N1 0, 5t7JKKr   )NNNNN)F)rX   rY   )__name__
__module____qualname____doc__r   r   r   r<   rL   rO   rW   rS   r?   r   r   r   r      s        c cJ JNOS(, J  J  J  JF2 2 2*K K K6 6 6  4  :
/ 
/ 
/ 
/J J J J J Jr   r   c                     dt          t          | dk              t          | dk                         z  t          t          |                     z  S )Nr"   r   )minsumfloatr#   )vecs    r   _pvaluer   V  s<    s3sQw<<S1W...s3xx@@r   c                        e Zd ZdZd ZddZdS )ri   z
    A class for holding the results of a mediation analysis.

    The following terms are used in the summary output:

    ACME : average causal mediated effect
    ADE : average direct effect
    c                 D   || _         || _        d d g}d d g}dD ]>}||                             d          ||<   ||                             d          ||<   ?|d         | _        |d         | _        |d         | _        |d         | _        | j        | j        z   | j        z   | j        z   dz  | _        | j        | j        z  | _        | j        | j        z  | _	        | j        | j	        z   dz  | _
        | j        | j        z   dz  | _        | j        | j        z   dz  | _        d S )Nr\   r   r!   r"   )rg   rh   mean	ACME_ctrlACME_txADE_ctrlADE_txtotal_effectprop_med_ctrlprop_med_txprop_med_avgACME_avgADE_avg)r   rg   rh   indirect_effects_avgdirect_effects_avgrz   s         r   r   zMediationResults.__init__d  s3    0, $d|"D\ 	> 	>A&6q&9&>&>q&A&A #$21$5$:$:1$=$=q!!-a0+A.*1-(+!^dl:T]JT[X\]]!^d.??<$*;;!/$2BBaG$,6!;3q8r   皙?c                    g d}g d}t          j        ||          }t          | j        | j        | j        | j        | j        | j        | j	        | j
        | j        | j        g
          D ]\  }}|| j        u s|| j	        u s	|| j        u rt          j        |          |j        |df<   n|                                |j        |df<   t          j        |d|z  dz            |j        |df<   t          j        |dd|dz  z
  z            |j        |df<   t%          |          |j        |df<   |                    t           j        d	
          }|S )z<
        Provide a summary of a mediation analysis.
        )EstimatezLower CI boundzUpper CI boundzP-value)
zACME (control)zACME (treated)zADE (control)zADE (treated)zTotal effectzProp. mediated (control)zProp. mediated (treated)zACME (average)zADE (average)zProp. mediated (average))columnsindexr   d   r"   r!      coerce)errors)pd	DataFrame	enumerater   r   r   r   r   r   r   r   r   r   r6   medianilocr   
percentiler   apply
to_numeric)r   alphar   r   smryir   s          r   summaryzMediationResults.summary|  se   
 NMM- - - |G5999!%!%!2D4F!%!14=!%t/@	!B C C 	+ 	+FAs ***t7G0G0GD---"$)C..	!Q$"%((**	!Q$ mCuqAADIadO mCEAI1FGGDIadO%cllDIadOOzz"-z99r   N)r   )r|   r}   r~   r   r   r   r?   r   r   ri   ri   Z  sA         9 9 90           r   ri   )
r   numpyr6   pandasr   statsmodels.graphics.utilsr   r   r   ri   r?   r   r   <module>r      s    "         8 8 8 8 8 8| | | | | | | |~	A A AB B B B B B B B B Br   