
    M/PhV                         d dl mZ d dlmZmZmZ d dlmZ d dlm	Z	 d dl
Z	 ddZddZddZd	 Zd
 Z	 	 ddZddZi fdZ G d d          Z G d de          ZdS )    )RegularizedResults)_calc_nodewise_row_calc_nodewise_weight_calc_approx_inv_cov)LikelihoodModelResults)OLSNc                 H    |t          d           | j        di |j        S )a  estimates the regularized fitted parameters.

    Parameters
    ----------
    mod : statsmodels model class instance
        The model for the current partition.
    pnum : scalar
        Index of current partition
    partitions : scalar
        Total number of partitions
    fit_kwds : dict-like or None
        Keyword arguments to be given to fit_regularized

    Returns
    -------
    An array of the parameters for the regularized fit
    NzD_est_regularized_naive currently requires that fit_kwds not be None. )
ValueErrorfit_regularizedparamsmodpnum
partitionsfit_kwdss       g/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/statsmodels/base/distributed_estimation.py_est_regularized_naiver   K   sB    &  ? @ @ 	@ 3****11    c                 H    |t          d           | j        di |j        S )a  estimates the unregularized fitted parameters.

    Parameters
    ----------
    mod : statsmodels model class instance
        The model for the current partition.
    pnum : scalar
        Index of current partition
    partitions : scalar
        Total number of partitions
    fit_kwds : dict-like or None
        Keyword arguments to be given to fit

    Returns
    -------
    An array of the parameters for the fit
    NzF_est_unregularized_naive currently requires that fit_kwds not be None.r
   )r   fitr   r   s       r   _est_unregularized_naiver   e   sA    &  ? @ @ 	@ 37X%%r   c                     t          | d                   }t          |           }t          j        |          }| D ]}||z  }||z  }d|t          j        |          |k     <   |S )a   joins the results from each run of _est_<type>_naive
    and returns the mean estimate of the coefficients

    Parameters
    ----------
    params_l : list
        A list of arrays of coefficients.
    threshold : scalar
        The threshold at which the coefficients will be cut.
    r   )lennpzerosabs)params_l	thresholdpr   	params_mnr   s         r   _join_naiver"      sq     	HQKAXJI  V		I/0IbfY)+,r   c                 ^     | j         t          j        |          fi | }||d|z
  z  z  }|S )a  calculates the log-likelihood gradient for the debiasing

    Parameters
    ----------
    mod : statsmodels model class instance
        The model for the current partition.
    params : array_like
        The estimated coefficients for the current partition.
    alpha : scalar or array_like
        The penalty weight.  If a scalar, the same penalty weight
        applies to all variables in the model.  If a vector, it
        must have the same length as `params`, and contains a
        penalty weight for each coefficient.
    L1_wt : scalar
        The fraction of the penalty given to the L1 penalty term.
        Must be between 0 and 1 (inclusive).  If 0, the fit is
        a ridge fit, if 1 it is a lasso fit.
    score_kwds : dict-like or None
        Keyword arguments for the score function.

    Returns
    -------
    An array-like object of the same dimension as params

    Notes
    -----
    In general:

    gradient l_k(params)

    where k corresponds to the index of the partition

    For OLS:

    X^T(y - X^T params)
       )scorer   asarray)r   r   alphaL1_wt
score_kwdsgrads         r   
_calc_gradr+      sC    L CIbj((77J777DEQYDKr   c                     t          j         | j        t          j        |          fi |          }|dddf         | j        z  S )a  calculates the weighted design matrix necessary to generate
    the approximate inverse covariance matrix

    Parameters
    ----------
    mod : statsmodels model class instance
        The model for the current partition.
    params : array_like
        The estimated coefficients for the current partition.
    hess_kwds : dict-like or None
        Keyword arguments for the hessian function.

    Returns
    -------
    An array-like object, updated design matrix, same dimension
    as mod.exog
    N)r   sqrthessian_factorr&   exog)r   r   	hess_kwdsrhesss       r   _calc_wdesign_matr2      sL    & G&C&rz&'9'9GGYGGHHED>CH$$r   c                 J   |i n|}|i n|}|t          d          |d         }d|v r	|d         }nd}| j        j        \  }}	t          t	          j        d|	z  |z                      }
 | j        di |j        }t          | ||||          |z  }t          | ||          }g }g }t          ||
z  t          |dz   |
z  |	                    D ]O}t          |||          }|                    |           t          ||||          }|                    |           P||||fS )a  estimates the regularized fitted parameters, is the default
    estimation_method for class DistributedModel.

    Parameters
    ----------
    mod : statsmodels model class instance
        The model for the current partition.
    mnum : scalar
        Index of current partition.
    partitions : scalar
        Total number of partitions.
    fit_kwds : dict-like or None
        Keyword arguments to be given to fit_regularized
    score_kwds : dict-like or None
        Keyword arguments for the score function.
    hess_kwds : dict-like or None
        Keyword arguments for the Hessian function.

    Returns
    -------
    A tuple of parameters for regularized fit
        An array-like object of the fitted parameters, params
        An array-like object for the gradient
        A list of array like objects for nodewise_row
        A list of array like objects for nodewise_weight
    NzG_est_regularized_debiased currently requires that fit_kwds not be None.r'   r(   r$   g      ?r
   )r   r/   shapeintr   ceilr   r   r+   r2   rangeminr   appendr   )r   mnumr   r   r)   r0   r'   r(   nobsr    p_partr   r*   wexognodewise_row_lnodewise_weight_lidxnodewise_rownodewise_weights                      r   _est_regularized_debiasedrC      sz   : ")zJ'YI ? @ @ 	@ !(!hnGD!"q&J.//00F S ,,8,,3Fc65%<<tCDc6955ENTF]CV(;Q$?$?@@ 2 2)%e<<l+++/|S057 7  11114):::r   c                 D   t          | d         d                   }t          |           }t          j        |          }t          j        |          }g }g }| D ]N}||d         z  }||d         z  }|                    |d                    |                    |d                    Ot          j        |          }t          j        |          }||z  }|d|z  z  }t          ||          }	||	                    |          z   }
d|
t          j        |
          |k     <   |
S )a  joins the results from each run of _est_regularized_debiased
    and returns the debiased estimate of the coefficients

    Parameters
    ----------
    results_l : list
        A list of tuples each one containing the params, grad,
        nodewise_row and nodewise_weight values for each partition.
    threshold : scalar
        The threshold at which the coefficients will be cut.
    r   r$         g      )r   r   r   extendarrayr   dotr   )	results_lr   r    r   r!   grad_mnr>   r?   rapprox_inv_covdebiased_paramss              r   _join_debiasedrO     s&    	IaLOAYJIhqkkGN ' 'QqT	1Q4ad###  1&&&&Xn--N!233IsZG).:KLLN."4"4W"="==O;<OBF?++i78r   c                     | j                                         }|                    |            | j        ||fi |} | j        ||| j        fd|i| j        }|S )a  handles the model fitting for each machine. NOTE: this
    is primarily handled outside of DistributedModel because
    joblib cannot handle class methods.

    Parameters
    ----------
    self : DistributedModel class instance
        An instance of DistributedModel.
    pnum : scalar
        index of current partition.
    endog : array_like
        endogenous data for current partition.
    exog : array_like
        exogenous data for current partition.
    fit_kwds : dict-like
        Keywords needed for the model fitting.
    init_kwds_e : dict-like
        Additional init_kwds to add for each partition.

    Returns
    -------
    estimation_method result.  For the default,
    _est_regularized_debiased, a tuple.
    r   )	init_kwdscopyupdatemodel_classestimation_methodr   estimation_kwds)	selfr   endogr/   r   init_kwds_etemp_init_kwdsmodelresultss	            r   _helper_fit_partitionr]   H  s    6 ^((**N+&&&DUD;;N;;E$d$UD$/ = =.6='+';= =G Nr   c                   B    e Zd ZdZ	 	 	 	 ddZ	 	 d	dZ	 d
dZ	 d
dZdS )DistributedModela  
    Distributed model class

    Parameters
    ----------
    partitions : scalar
        The number of partitions that the data will be split into.
    model_class : statsmodels model class
        The model class which will be used for estimation. If None
        this defaults to OLS.
    init_kwds : dict-like or None
        Keywords needed for initializing the model, in addition to
        endog and exog.
    init_kwds_generator : generator or None
        Additional keyword generator that produces model init_kwds
        that may vary based on data partition.  The current usecase
        is for WLS and GLS
    estimation_method : function or None
        The method that performs the estimation for each partition.
        If None this defaults to _est_regularized_debiased.
    estimation_kwds : dict-like or None
        Keywords to be passed to estimation_method.
    join_method : function or None
        The method used to recombine the results from each partition.
        If None this defaults to _join_debiased.
    join_kwds : dict-like or None
        Keywords to be passed to join_method.
    results_class : results class or None
        The class of results that should be returned.  If None this
        defaults to RegularizedResults.
    results_kwds : dict-like or None
        Keywords to be passed to results class.

    Attributes
    ----------
    partitions : scalar
        See Parameters.
    model_class : statsmodels model class
        See Parameters.
    init_kwds : dict-like
        See Parameters.
    init_kwds_generator : generator or None
        See Parameters.
    estimation_method : function
        See Parameters.
    estimation_kwds : dict-like
        See Parameters.
    join_method : function
        See Parameters.
    join_kwds : dict-like
        See Parameters.
    results_class : results class
        See Parameters.
    results_kwds : dict-like
        See Parameters.

    Notes
    -----

    Examples
    --------
    Nc
                 N   || _         |t          | _        n|| _        |i | _        n|| _        |t          | _        n|| _        |i | _        n|| _        |t          | _        n|| _        |i | _	        n|| _	        |t          | _        n|| _        |		i | _        d S |	| _        d S N)r   r   rT   rQ   rC   rU   rV   rO   join_method	join_kwdsr   results_classresults_kwds)
rW   r   rT   rQ   rU   rV   rb   rc   rd   re   s
             r   __init__zDistributedModel.__init__  s    
 %"D*DDNN&DN$%>D""%6D""#%D  #2D -D*DDNN&DN !3D!.D "D ,Dr   
sequentialc                 "   |i }|dk    r|                      |||          }n1|dk    r|                     ||||          }nt          d|z             | j        |fi | j        } | j        dgdgfi | j        } | j        ||fi | j        S )ae  Performs the distributed estimation using the corresponding
        DistributedModel

        Parameters
        ----------
        data_generator : generator
            A generator that produces a sequence of tuples where the first
            element in the tuple corresponds to an endog array and the
            element corresponds to an exog array.
        fit_kwds : dict-like or None
            Keywords needed for the model fitting.
        parallel_method : str
            type of distributed estimation to be used, currently
            "sequential", "joblib" and "dask" are supported.
        parallel_backend : None or joblib parallel_backend object
            used to allow support for more complicated backends,
            ex: dask.distributed
        init_kwds_generator : generator or None
            Additional keyword generator that produces model init_kwds
            that may vary based on data partition.  The current usecase
            is for WLS and GLS

        Returns
        -------
        join_method result.  For the default, _join_debiased, it returns a
        p length array.
        Nrg   joblibz.parallel_method: %s is currently not supportedr   )	fit_sequential
fit_joblibr   rb   rc   rT   rQ   rd   re   )	rW   data_generatorr   parallel_methodparallel_backendinit_kwds_generatorrJ   r   res_mods	            r   r   zDistributedModel.fit  s    < Hl**++NH,?A AII (((8(;= =II
 M./ 0 0 0 "!)>>t~>> #$"A3>>t~>>!t!'6GGT5FGGGr   c           	      8   g }|At          |          D ]0\  }\  }}t          | ||||          }|                    |           1nTt          t          ||                    }	|	D ]4\  }\  \  }}}
t          | |||||
          }|                    |           5|S )a*  Sequentially performs the distributed estimation using
        the corresponding DistributedModel

        Parameters
        ----------
        data_generator : generator
            A generator that produces a sequence of tuples where the first
            element in the tuple corresponds to an endog array and the
            element corresponds to an exog array.
        fit_kwds : dict-like
            Keywords needed for the model fitting.
        init_kwds_generator : generator or None
            Additional keyword generator that produces model init_kwds
            that may vary based on data partition.  The current usecase
            is for WLS and GLS

        Returns
        -------
        join_method result.  For the default, _join_debiased, it returns a
        p length array.
        )	enumerater]   r9   zip)rW   rl   r   ro   rJ   r   rX   r/   r\   tup_genrY   s              r   rj   zDistributedModel.fit_sequential  s    0 	&'0'@'@ * *#mud/dE408: :  ))))	*  N$7!9 !9 : :G 7> * *22}tk/dE408+G G  ))))r   c                 J   
 ddl m}  |t           j                  \  }
}|)|' |
 fdt	          |          D                       }n|C|A|5   |
 fdt	          |          D                       }ddd           n# 1 swxY w Y   n|9|7t	          t          ||                    }	 |
 fd|	D                       }nT|R|Pt	          t          ||                    }	|5   |
 fd|	D                       }ddd           n# 1 swxY w Y   |S )a  Performs the distributed estimation in parallel using joblib

        Parameters
        ----------
        data_generator : generator
            A generator that produces a sequence of tuples where the first
            element in the tuple corresponds to an endog array and the
            element corresponds to an exog array.
        fit_kwds : dict-like
            Keywords needed for the model fitting.
        parallel_backend : None or joblib parallel_backend object
            used to allow support for more complicated backends,
            ex: dask.distributed
        init_kwds_generator : generator or None
            Additional keyword generator that produces model init_kwds
            that may vary based on data partition.  The current usecase
            is for WLS and GLS

        Returns
        -------
        join_method result.  For the default, _join_debiased, it returns a
        p length array.
        r   )parallel_funcNc              3   B   K   | ]\  }\  }} |||          V  d S ra   r
   .0r   rX   r/   fr   rW   s       r   	<genexpr>z.DistributedModel.fit_joblib.<locals>.<genexpr>c  sQ       : : 3mud AdD%x@@ : : : : : :r   c              3   B   K   | ]\  }\  }} |||          V  d S ra   r
   rx   s       r   r{   z.DistributedModel.fit_joblib.<locals>.<genexpr>i  sQ        >  >$7D-5$ !"$eT8 D D  >  >  >  >  >  >r   c           	   3   J   K   | ]\  }\  \  }}} ||||          V  d S ra   r
   ry   r   rX   r/   rQ   rz   r   rW   s        r   r{   z.DistributedModel.fit_joblib.<locals>.<genexpr>o  sX       ( ( @&@}ti AdD%xKK ( ( ( ( ( (r   c           	   3   J   K   | ]\  }\  \  }}} ||||          V  d S ra   r
   r~   s        r   r{   z.DistributedModel.fit_joblib.<locals>.<genexpr>v  sX        ,  ,$DD*D=E4) !"$eT8Y O O  ,  ,  ,  ,  ,  ,r   )statsmodels.tools.parallelrv   r]   r   rr   rs   )rW   rl   r   rn   ro   rv   parn_jobsrJ   rt   rz   s   ` `       @r   rk   zDistributedModel.fit_joblibD  sp   4 	=<<<<<&'<doNNQ#(;(C : : : : : :(88: : : : :II ).A.I! > >C  >  >  >  >  >  >#,^#<#< >  >  > > >	> > > > > > > > > > > > > > >
 %*=*IN4G H HIIG ( ( ( ( ( (&( ( ( ( (II ).A.MN4G H HIIG! , ,C  ,  ,  ,  ,  ,  ,#* ,  ,  , , ,	, , , , , , , , , , , , , , ,
 s$   'BBB2DDD)NNNNNNNN)Nrg   NNra   )__name__
__module____qualname____doc__rf   r   rj   rk   r
   r   r   r_   r_   m  s        =G~ 0437CG26-- -- -- --^ BN7;7H 7H 7H 7Ht ,0- - - -` (,6 6 6 6 6 6r   r_   c                   (     e Zd ZdZ fdZd Z xZS )DistributedResultsaT  
    Class to contain model results

    Parameters
    ----------
    model : class instance
        Class instance for model used for distributed data,
        this particular instance uses fake data and is really
        only to allow use of methods like predict.
    params : ndarray
        Parameter estimates from the fit model.
    c                 L    t                                          ||           d S ra   )superrf   )rW   r[   r   	__class__s      r   rf   zDistributedResults.__init__  s#    '''''r   c                 :     | j         j        | j        |g|R i |S )a  Calls self.model.predict for the provided exog.  See
        Results.predict.

        Parameters
        ----------
        exog : array_like NOT optional
            The values for which we want to predict, unlike standard
            predict this is NOT optional since the data in self.model
            is fake.
        *args :
            Some models can take additional arguments. See the
            predict method of the model for the details.
        **kwargs :
            Some models can take additional keywords arguments. See the
            predict method of the model for the details.

        Returns
        -------
            prediction : ndarray, pandas.Series or pandas.DataFrame
            See self.model.predict
        )r[   predictr   )rW   r/   argskwargss       r   r   zDistributedResults.predict  s-    . "tz!$+tEdEEEfEEEr   )r   r   r   r   rf   r   __classcell__)r   s   @r   r   r   }  sX         ( ( ( ( (F F F F F F Fr   r   ra   )r   )NNN)statsmodels.base.elastic_netr   (statsmodels.stats.regularized_covariancer   r   r   statsmodels.base.modelr   #statsmodels.regression.linear_modelr   numpyr   r   r   r"   r+   r2   rC   rO   r]   r_   r   r
   r   r   <module>r      s   ; ; ; ; ; ;0 0 0 0 0 0 0 0 0 0 9 9 9 9 9 9 3 3 3 3 3 3    @F2 2 2 24& & & &4   2( ( (V% % %. ?C9=>; >; >; >;B* * * *\ ')" " " "JM M M M M M M M`(F (F (F (F (F/ (F (F (F (F (Fr   