
    M/Ph=                         d Z ddlZddlZddlZddlZddlmZ ddl	m
Z
 dZd Zd Zd Zd	 Z e
e          	 	 	 	 dd            Zd ZdS )ae  
A predict-like function that constructs means and pointwise or
simultaneous confidence bands for the function f(x) = E[Y | X*=x,
X1=x1, ...], where X* is the focus variable and X1, X2, ... are
non-focus variables.  This is especially useful when conducting a
functional regression in which the role of x is modeled with b-splines
or other basis functions.
    N)ValueWarning)Appendera  
    Predictions and contrasts of a fitted model as a function of a given covariate.

    The value of the focus variable varies along a sequence of its
    quantiles, calculated from the data used to fit the model.  The
    other variables are held constant either at given values, or at
    values obtained by applying given summary functions to the data
    used to fit the model.  Optionally, a second specification of the
    non-focus variables is provided and the contrast between the two
    specifications is returned.

    Parameters
    ----------
    result : statsmodels result object
        A results object for the fitted model.
    focus_var : str
        The name of the 'focus variable'.
    summaries : dict-like
        A map from names of non-focus variables to summary functions.
        Each summary function is applied to the data used to fit the
        model, to obtain a value at which the variable is held fixed.
    values : dict-like
        Values at which a given non-focus variable is held fixed.
    summaries2 : dict-like
        A second set of summary functions used to define a contrast.
    values2 : dict-like
        A second set of fixed values used to define a contrast.
    alpha : float
        `1 - alpha` is the coverage probability.
    ci_method : str
        The method for constructing the confidence band, one of
        'pointwise', 'scheffe', and 'simultaneous'.
    num_points : int
        The number of equally-spaced quantile points where the
        prediction is made.
    exog : array_like
        Explicitly provide points to cover with the confidence band.
    exog2 : array_like
        Explicitly provide points to contrast to `exog` in a functional
        confidence band.
    kwargs :
        Arguments passed to the `predict` method.

    Returns
    -------
    pred : array_like
        The predicted mean values.
    cb : array_like
        An array with two columns, containing respectively the lower
        and upper limits of a confidence band.
    fvals : array_like
        The values of the focus variable at which the prediction is
        made.

    Notes
    -----
    All variables in the model except for the focus variable should be
    included as a key in either `summaries` or `values` (unless `exog`
    is provided).

    If `summaries2` and `values2` are not provided, the returned value
    contains predicted conditional means for the outcome as the focus
    variable varies, with the other variables fixed as specified.

    If `summaries2` and/or `values2` is provided, two sets of
    predicted conditional means are calculated, and the returned value
    is the contrast between them.

    If `exog` is provided, then the rows should contain a sequence of
    values approximating a continuous path through the domain of the
    covariates.  For example, if Z(s) is the covariate expressed as a
    function of s, then the rows of exog may approximate Z(g(s)) for
    some continuous function g.  If `exog` is provided then neither of
    the summaries or values arguments should be provided.  If `exog2`
    is also provided, then the returned value is a contrast between
    the functionas defined by `exog` and `exog2`.

    Examples
    --------
    Fit a model using a formula in which the predictors are age
    (modeled with splines), ethnicity (which is categorical), gender,
    and income.  Then we obtain the fitted mean values as a function
    of age for females with mean income and the most common
    ethnicity.

    >>> model = sm.OLS.from_formula('y ~ bs(age, df=4) + C(ethnicity) + gender + income', data)
    >>> result = model.fit()
    >>> mode = lambda x : x.value_counts().argmax()
    >>> summaries = {'income': np.mean, ethnicity=mode}
    >>> values = {'gender': 'female'}
    >>> pr, cb, x = predict_functional(result, 'age', summaries, values)

    Fit a model using arrays.  Plot the means as a function of x3,
    holding x1 fixed at its mean value in the data used to fit the
    model, and holding x2 fixed at 1.

    >>> model = sm.OLS(y ,x)
    >>> result = model.fit()
    >>> summaries = {'x1': np.mean}
    >>> values = {'x2': 1}
    >>> pr, cb, x = predict_functional(result, 'x3', summaries, values)

    Fit a model usng a formula and construct a contrast comparing the
    female and male predicted mean functions.

    >>> model = sm.OLS.from_formula('y ~ bs(age, df=4) + gender', data)
    >>> result = model.fit()
    >>> values = {'gender': 'female'}
    >>> values2 = {'gender': 'male'}
    >>> pr, cb, x = predict_functional(result, 'age', values=values, values2=values2)
    c                    | j         }|j        j        |i }|i }|         j        t	          j        d          u rt          d          t          |                                          t          |                                          z   |gz   }fd|D             }t          j	        
                                          |j        hz
  }|t          |          z
  }	t          |	          }	t          |	          dk    r:t          j        dd                    d |	D                       z  t                      t#          |          }
t%          j        |
|	          }t)          ||          D ]\  }}t%          j        |
|
          ||<   t	          j        dd|          
                                }t	          j        |         |          }t	          j        |          }||j        dd|f<   |                                D ].} ||         j        dd|f                   |j        dd|f<   /|                                D ]}||         ||<   t5          j        |j        j        |d          }|||fS )a  
    Create dataframes for exploring a fitted model as a function of one variable.

    This works for models fit with a formula.

    Returns
    -------
    dexog : data frame
        A data frame in which the focus variable varies and the other variables
        are fixed at specified or computed values.
    fexog : data frame
        The data frame `dexog` processed through the model formula.
    NOz'focus variable may not have object typec                 *    g | ]}|         j         S  )dtype).0xexogs     f/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/sandbox/predict_functional.py
<listcomp>z+_make_exog_from_formula.<locals>.<listcomp>   s    ...d1gm...    r   z0%s in data frame but not in summaries or values., c                     g | ]}d |z  S z'%s'r   r
   r   s     r   r   z+_make_exog_from_formula.<locals>.<listcomp>       "A"A"A!6A:"A"A"Ar   )indexcolumns)r   r	   d   	dataframereturn_type)modeldataframer	   np
ValueErrorlistkeyssetr   tolistendog_nameslenwarningswarnjoinr   rangepd	DataFramezipSerieslinspace
percentileasarraylocpatsydmatrixdesign_info)result	focus_var	summariesvalues
num_pointsr   colnamesdtypesvarl	unmatchedixfexogdr   pctlsfvalskydexogr   s                     @r   _make_exog_from_formularE      s    LE:D	~I--BCCCINN$$%%V[[]](;(;;ykIH....X...F t|""$$%%):(;;Ds8}}$IYI
9~~H		"A"Ay"A"A"ABBC"	$ 	$ 	$
 
z		BLr8444EFH%% 0 0192Q///a K3
++2244EM$y/511EJuE#EIaaal nn : :(9R=!!!R%99	!!!R% kkmm  2Jb		M%*0%&13 3 3E%r   c                    | j         }|j        }|j        }|i }|i }t          j        ||j        d         f          }t          |                                          t          |                                          z   |gz   }	t          |          t          |	          z
  }
t          |
          }
t          |
          dk    r:t          j        dd                    d |
D                       z  t                     t          j        dd|                                          }|                    |          }t          j        |dd|f         |          }||dd|f<   |                                D ]9}|                    |          } ||         |dd|f                   |dd|f<   :|                                D ]&}|                    |          }||         |dd|f<   '||fS )a;  
    Create dataframes for exploring a fitted model as a function of one variable.

    This works for models fit without a formula.

    Returns
    -------
    exog : data frame
        A data frame in which the focus variable varies and the other variables
        are fixed at specified or computed values.
    N   r   z/%s in model but not in `summaries` or `values`.r   c                     g | ]}d |z  S r   r   r   s     r   r   z*_make_exog_from_arrays.<locals>.<listcomp>   r   r   r   )r   r   
exog_namesr   zerosshaper    r!   r"   r%   r&   r'   r(   r   r.   r#   r   r/   )r5   r6   r7   r8   r9   r   
model_exogrI   r   r:   r=   rA   r>   rB   rC   s                  r   _make_exog_from_arraysrM      s    LEJ!J	~8Z!1!!4566D FKKMM""T)..*:*:%;%;;ykIHJ#h--/IYI
9~~G		"A"Ay"A"A"ABBC"	$ 	$ 	$
 K3
++2244E			)	$	$BM*QQQU+U33EDBK nn 7 7b!!#imJqqq"u$566QQQU kkmm ! !b!!RjQQQU;r   c                     t          | j        j        d          rt          | ||||          \  }}}nt	          | ||||          \  }}||}}|||fS )Nr   )hasattrr   r   rE   rM   )	r5   r6   r7   r8   r9   rD   r?   rB   r   s	            r   
_make_exogrP      sv     v| '** "5fi'0&*F Fuee -VY	!'5 5eTu%r   c                 ^   | i } |i }|i }|i }|| f||ffD ]\  }}t          |                                          t          |                                          z  }t          |          }t          |          dk    r%t	          dd                    |          z             | |||fS )Nr   zJOne or more variable names are contained in both `summaries` and `values`:r   )r"   r!   r    r%   r   r(   )r8   r7   values2
summaries2svrC   s          r   _check_argsrV   	  s    ~	
V$z7&;; , ,1]]S]]*"XXr77Q;;i!YYr]]+ , , ,  9gz11r   皙?	pointwiseT
   c                 >   |dvrt          d          |d up|d u}|r|st          d          | j        }|
t          d ||||fD                       rt          d          |
}t          j        |j        j        |d          }|
|         }|%|
}t          j        |j        j        |d          }|}nht          ||||          \  }}}}t          | ||||	          \  }}}t          |          t          |          z   dk    rt          | ||||	          \  }}}dd	l
m} dd
lm} t          | j        ||f          r,|                                }|                    ddi           n|} | j        dd|i|}|r | j        dd|i|}||z
  }||z
  }|dk    r-|                     |          }|                    |          }n|dk    r|                     |          }|j        }t+          j        |	df          }ddlm} | j        j        j        d         }| j        j        j        d         |z
  }|                    d|z
  ||          } |t+          j        || z            z  }!||!z
  |d d df<   ||!z   |d d df<   nY|dk    rSt;          | ||          \  }"}#t+          j        |j        d         df          }||#|"z  z
  |d d df<   ||#|"z  z   |d d df<   |s6| j        j        }$|$                     |          }|$                     |          }|||fS )N)rX   scheffesimultaneouszQconfidence band method must be one of `pointwise`, `scheffe`, and `simultaneous`.z-`linear` must be True for computing contrastsc              3      K   | ]}|d uV  	d S )Nr   r   s     r   	<genexpr>z%predict_functional.<locals>.<genexpr>0  s&      OOq}OOOOOOr   zAif `exog` is provided then do not provide `summaries` or `values`r   r   r   )GLM)GEEwhichlinearr   rX   )alphar[      )frG   r\   r   )!r   r   anyr2   r3   r   r4   rV   rP   r%   +statsmodels.genmod.generalized_linear_modelr_   3statsmodels.genmod.generalized_estimating_equationsr`   
isinstancecopyupdatepredictt_testconf_intsdr   rJ   scipy.stats.distributionsre   r   rK   cdfsqrt_glm_basic_scrfamilylinkinverse)%r5   r6   r7   r8   rS   rR   rc   	ci_methodrb   r9   r   exog2kwargscontrastr   r?   rD   rB   fexog2dexog2fvals2r_   r`   kwargs_predpredpred2rm   cbro   fdistdf1df2qffxsigmacru   s%                                        r   predict_functionalr     s    @@@ G H H 	H t#@4)?H J JHIIILEOOy*fg&NOOOOO 	@ ? @ @ @ ej4#> > >YF]5:#9#){D D DFF 2=V=F=D=G2I 2I.	7J
 )I)/= =ue z??S\\)A--%/	:07&E &E"FFF @?????GGGGGG&,c
++ kkmmGX.////6>44u444D ::F:k::e|Ku%%__5_))	i		u%%YXz1o&& 	988888l%a(l%a(3.YYq5y#s++"'#(###"9111a4"9111a4	n	$	$!&%77qXu{1~q)**!E'>111a4!E'>111a4 }!||D!!\\"U?r   c                 f   | j         }|j        j        d         }|                                 }t          j                            |          }||z  }t          j                            |          j        }t	          j	        ||          |z  
                    d          }	t	          j        t	          j        |	                    }
t          j                            |j        |j                  j        }|t	          j        |          z  }||
dddf         z  }t	          j        |dd          }|dz  
                    d          }t	          j        |          
                                ddlm fd}ddlm}  ||dd	d
          \  }}|j        st)          d          |
|fS )a  
    The basic SCR from (Sun et al. Annals of Statistics 2000).

    Computes simultaneous confidence regions (SCR).

    Parameters
    ----------
    result : results instance
        The fitted GLM results instance
    exog : array_like
        The exog values spanning the interval
    alpha : float
        `1 - alpha` is the coverage probability.

    Returns
    -------
    An array with two columns, containing the lower and upper
    confidence bounds, respectively.

    Notes
    -----
    The rows of `exog` should be a sequence of covariate values
    obtained by taking one 'free variable' x and varying it over an
    interval.  The matrix `exog` is thus the basis functions and any
    other covariates evaluated as x varies.
    r   rG   N)axisrd   )normc                     t          j        | dz   dz            z  t           j        z  dd                    |           z
  z  z   z
  S )Nrd   rG   )r   exppirq   )r   rc   kappa_0r   s    r   funcz_glm_basic_scr.<locals>.func  sD    Aa(2501a$((1++o3FFNNr   )brentqrY   T)full_outputzRoot finding error in basic SCR)r   r   rK   
cov_paramsr   linalginvcholeskyTdotsumr0   rr   solvediffrp   r   scipy.optimizer   	convergedr   )r5   r   rc   r   ncovhessABsigma2r   bzbzdbzdnr   r   r   rsltr   r   s     `               @@r   rs   rs     s   8 LE
A 



C9==D 	qA
	1A fT3$&++A..FJrwv''E 
df	%	%	'B"'!**B%4.B
'"aa
 
 
 CF<<??Dgdmm!!G......O O O O O O O &%%%%%fT1bd333GAt> <:;;;!8Or   )
NNNNrW   rX   TrY   NN)__doc__pandasr*   r2   numpyr   r&   statsmodels.tools.sm_exceptionsr   statsmodels.compat.pandasr   _predict_functional_docrE   rM   rP   rV   r   rs   r   r   r   <module>r      s              8 8 8 8 8 8 . . . . . .n d@ @ @F2 2 2j  2 2 2* 

!""AE<@FH(,] ] ] #"]@@ @ @ @ @r   