
    0Ph-l                         d Z ddlmZ ddlmZmZ ddlZddlm	Z	 ddl
mZ ddlmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZ ddlmZmZmZ  G d dee          Z G d dee          ZdS )z
Various bayesian regression
    )log)IntegralRealN)linalg)pinvh   )RegressorMixin_fit_context)_safe_indexing)Interval)fast_logdet)_check_sample_weightvalidate_data   )LinearModel_preprocess_data_rescale_datac                      e Zd ZU dZ eeddd          g eeddd          g eeddd          g eeddd          g eeddd          g eeddd          gd eeddd          gd eeddd          gdgdgdgd	gd
Zee	d<   ddddddddddddd
dZ
 ed          dd            ZddZd Zd ZdS )BayesianRidgea  Bayesian ridge regression.

    Fit a Bayesian ridge model. See the Notes section for details on this
    implementation and the optimization of the regularization parameters
    lambda (precision of the weights) and alpha (precision of the noise).

    Read more in the :ref:`User Guide <bayesian_regression>`.
    For an intuitive visualization of how the sinusoid is approximated by
    a polynomial using different pairs of initial values, see
    :ref:`sphx_glr_auto_examples_linear_model_plot_bayesian_ridge_curvefit.py`.

    Parameters
    ----------
    max_iter : int, default=300
        Maximum number of iterations over the complete dataset before
        stopping independently of any early stopping criterion.

        .. versionchanged:: 1.3

    tol : float, default=1e-3
        Stop the algorithm if w has converged.

    alpha_1 : float, default=1e-6
        Hyper-parameter : shape parameter for the Gamma distribution prior
        over the alpha parameter.

    alpha_2 : float, default=1e-6
        Hyper-parameter : inverse scale parameter (rate parameter) for the
        Gamma distribution prior over the alpha parameter.

    lambda_1 : float, default=1e-6
        Hyper-parameter : shape parameter for the Gamma distribution prior
        over the lambda parameter.

    lambda_2 : float, default=1e-6
        Hyper-parameter : inverse scale parameter (rate parameter) for the
        Gamma distribution prior over the lambda parameter.

    alpha_init : float, default=None
        Initial value for alpha (precision of the noise).
        If not set, alpha_init is 1/Var(y).

        .. versionadded:: 0.22

    lambda_init : float, default=None
        Initial value for lambda (precision of the weights).
        If not set, lambda_init is 1.

        .. versionadded:: 0.22

    compute_score : bool, default=False
        If True, compute the log marginal likelihood at each iteration of the
        optimization.

    fit_intercept : bool, default=True
        Whether to calculate the intercept for this model.
        The intercept is not treated as a probabilistic parameter
        and thus has no associated variance. If set
        to False, no intercept will be used in calculations
        (i.e. data is expected to be centered).

    copy_X : bool, default=True
        If True, X will be copied; else, it may be overwritten.

    verbose : bool, default=False
        Verbose mode when fitting the model.

    Attributes
    ----------
    coef_ : array-like of shape (n_features,)
        Coefficients of the regression model (mean of distribution)

    intercept_ : float
        Independent term in decision function. Set to 0.0 if
        `fit_intercept = False`.

    alpha_ : float
       Estimated precision of the noise.

    lambda_ : float
       Estimated precision of the weights.

    sigma_ : array-like of shape (n_features, n_features)
        Estimated variance-covariance matrix of the weights

    scores_ : array-like of shape (n_iter_+1,)
        If computed_score is True, value of the log marginal likelihood (to be
        maximized) at each iteration of the optimization. The array starts
        with the value of the log marginal likelihood obtained for the initial
        values of alpha and lambda and ends with the value obtained for the
        estimated alpha and lambda.

    n_iter_ : int
        The actual number of iterations to reach the stopping criterion.

    X_offset_ : ndarray of shape (n_features,)
        If `fit_intercept=True`, offset subtracted for centering data to a
        zero mean. Set to np.zeros(n_features) otherwise.

    X_scale_ : ndarray of shape (n_features,)
        Set to np.ones(n_features).

    n_features_in_ : int
        Number of features seen during :term:`fit`.

        .. versionadded:: 0.24

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during :term:`fit`. Defined only when `X`
        has feature names that are all strings.

        .. versionadded:: 1.0

    See Also
    --------
    ARDRegression : Bayesian ARD regression.

    Notes
    -----
    There exist several strategies to perform Bayesian ridge regression. This
    implementation is based on the algorithm described in Appendix A of
    (Tipping, 2001) where updates of the regularization parameters are done as
    suggested in (MacKay, 1992). Note that according to A New
    View of Automatic Relevance Determination (Wipf and Nagarajan, 2008) these
    update rules do not guarantee that the marginal likelihood is increasing
    between two consecutive iterations of the optimization.

    References
    ----------
    D. J. C. MacKay, Bayesian Interpolation, Computation and Neural Systems,
    Vol. 4, No. 3, 1992.

    M. E. Tipping, Sparse Bayesian Learning and the Relevance Vector Machine,
    Journal of Machine Learning Research, Vol. 1, 2001.

    Examples
    --------
    >>> from sklearn import linear_model
    >>> clf = linear_model.BayesianRidge()
    >>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])
    BayesianRidge()
    >>> clf.predict([[1, 1]])
    array([1.])
    r   Nleftclosedr   neitherbooleanverbosemax_itertolalpha_1alpha_2lambda_1lambda_2
alpha_initlambda_initcompute_scorefit_interceptcopy_Xr   _parameter_constraints,  MbP?ư>FTc                    || _         || _        || _        || _        || _        || _        || _        || _        |	| _        |
| _	        || _
        || _        d S Nr   )selfr   r   r   r    r!   r"   r#   r$   r%   r&   r'   r   s                [/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/sklearn/linear_model/_bayes.py__init__zBayesianRidge.__init__   s_      !  $&**    prefer_skip_nested_validationc                 |   t          | ||t          j        t          j        gdd          \  }}|j        }|t          |||          }t          ||| j        | j        |          \  }}}}}|t          |||          \  }}}|| _
        || _        |j        \  }	}
t          j        t          j                  j        }| j        }| j        }|dt          j        |          |z   z  }|d}t          j        ||          }t          j        ||          }| j        }| j        }| j        }| j        }| j        }t1                      | _        d}t          j        |j        |          }t9          j        |d          \  }}}|d	z  }t=          | j                  D ](}|                      |||	|
||||||
  
        \  }}| j!        r5| "                    |	|
|||||          }| j        #                    |           t          j$        ||z  |||z  z   z            }|d	|z  z   t          j$        |d	z            d	|z  z   z  }|	|z
  d	|z  z   |d	|z  z   z  }|d
k    rTt          j$        t          j%        ||z
                      | j&        k     r"|rtO          dtQ          |          d            nt          j)        |          }*|dz   | _*        || _+        || _,        |                      |||	|
||||||
  
        \  | _-        }| j!        rS| "                    |	|
|||||          }| j        #                    |           t          j.        | j                  | _        t          j        |j        ||||z  z   ddt          j/        f         z            }d|z  |z  | _0        | 1                    |||           | S )a7  Fit the model.

        Parameters
        ----------
        X : ndarray of shape (n_samples, n_features)
            Training data.
        y : ndarray of shape (n_samples,)
            Target values. Will be cast to X's dtype if necessary.

        sample_weight : ndarray of shape (n_samples,), default=None
            Individual weights for each sample.

            .. versionadded:: 0.20
               parameter *sample_weight* support to BayesianRidge.

        Returns
        -------
        self : object
            Returns the instance itself.
        T)dtypeforce_writeable	y_numericNr5   )r&   copysample_weight      ?F)full_matricesr   r   zConvergence after z iterationsr   )2r   npfloat64float32r5   r   r   r&   r'   r   	X_offset_X_scale_shapefinfoepsr#   r$   varasarrayr   r!   r"   r   r    listscores_dotTr   svdranger   _update_coef_r%   _log_marginal_likelihoodappendsumabsr   printstrr9   n_iter_alpha_lambda_coef_arraynewaxissigma__set_intercept)r.   Xyr:   r5   r@   	y_offset_rA   _	n_samples
n_featuresrD   rU   rV   r   r!   r"   r   r    	coef_old_XT_yUSVheigen_vals_iter_rW   rmse_sgamma_scaled_sigma_s                                  r/   fitzBayesianRidge.fit   s   , :rz* 
 
 
1 $0OOOM/?,'0
 0
 0
,1iH $#Aq-88GAq!"  !	: hrz""& ">BF1IIO,F?G F%000*WE222,==,,vv	vac1~~:au5551bd 4=)) 	' 	'E  --1iT1b+vw LE5 ! '11z;PU  ##A&&& VVk1g@T6TUVVFH,q1A1AAL1PQG&(1w;651w;;NOF zzbfRVI,=%>%>??$(JJ K.E

MJJJIIqy  ..q)Zq"k67
 

E  	2--:{FGUE A L"""8DL11DL D"g&662:FF
 
 V|}4Iy(;;;r1   c                     |                      |          }|s|S t          j        || j                  |z                      d          }t          j        |d| j        z  z             }||fS )  Predict using the linear model.

        In addition to the mean of the predictive distribution, also its
        standard deviation can be returned.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Samples.

        return_std : bool, default=False
            Whether to return the standard deviation of posterior prediction.

        Returns
        -------
        y_mean : array-like of shape (n_samples,)
            Mean of predictive distribution of query points.

        y_std : array-like of shape (n_samples,)
            Standard deviation of predictive distribution of query points.
        r   axisr;   )_decision_functionr=   rI   rZ   rP   sqrtrU   )r.   r\   
return_stdy_meansigmas_squared_datay_stds         r/   predictzBayesianRidge.predict\  st    , ((++ 	!M#%6!T[#9#9A#="B"B"B"J"JG/33DEFFE5= r1   c                 z   ||k    rEt           j                            |j        |||
|	z  z   ddt           j        f         z  |g          }n@t           j                            |j        |||
|	z  z   dddf         z  |j        |g          }t          j        |t          j        ||          z
  dz            }||fS )zUpdate posterior mean and compute corresponding rmse.

        Posterior mean is given by coef_ = scaled_sigma_ * X.T * y where
        scaled_sigma_ = (lambda_/alpha_ * np.eye(n_features)
                         + np.dot(X.T, X))^-1
        Nr   )r=   r   	multi_dotrJ   rY   rP   rI   )r.   r\   r]   r`   ra   rc   rd   rf   rg   rU   rV   rW   ri   s                r/   rM   zBayesianRidge._update_coef_z  s     z!!I''r[7V+;;QQQ
]KKTR EE I''a;6)9947CCQS!L E BF1e,,,233e|r1   c                    | j         }| j        }	| j        }
| j        }||k    r.t	          j        t	          j        |||z  z                        }njt	          j        ||t	          j        |          j	                  }|d|xx         ||z  z  cc<   t	          j        t	          j        |                     }|
t          |          z  ||z  z
  }||t          |          z  |	|z  z
  z  }|d|t          |          z  |t          |          z  z   ||z  z
  |t	          j        |dz            z  z
  |z   |t          dt          j
        z            z  z
  z  z  }|S )zLog marginal likelihood.r8   N      ?r   )r   r    r!   r"   r=   rP   r   fullrX   r5   pi)r.   r`   ra   
eigen_valsrU   rV   coefrmser   r    r!   r"   logdet_sigmascores                 r/   rN   z&BayesianRidge._log_marginal_likelihood  sw    ,,==
 z!!F26'FZ4G*G#H#HIIILL7:wbhw>O>O>UVVVL)$$$(;;$$$F26,#7#7888L3w<<'(W*<<3v;;&6)999W%#f++%&tm tQw'( 	
 #a"%i..()
 	
 r1   r-   F)__name__
__module____qualname____doc__r   r   r   r(   dict__annotations__r0   r
   rm   rx   rM   rN    r1   r/   r   r      s        O Od Xh4???@q$y999:HT1d6:::;HT1d6:::;XdAtF;;;<XdAtF;;;<XXdAtFCCCDhhtQVDDDE##+;$ $D   $     : \555A A A 65AF! ! ! !<  .    r1   r   c                   d   e Zd ZU dZ eeddd          g eeddd          g eeddd          g eeddd          g eeddd          g eeddd          gdg eeddd          gdgdgdgd	Zee	d
<   dddddddddddd	dZ
 ed          d             Zd Zd ZddZdS )ARDRegressiona  Bayesian ARD regression.

    Fit the weights of a regression model, using an ARD prior. The weights of
    the regression model are assumed to be in Gaussian distributions.
    Also estimate the parameters lambda (precisions of the distributions of the
    weights) and alpha (precision of the distribution of the noise).
    The estimation is done by an iterative procedures (Evidence Maximization)

    Read more in the :ref:`User Guide <bayesian_regression>`.

    Parameters
    ----------
    max_iter : int, default=300
        Maximum number of iterations.

        .. versionchanged:: 1.3

    tol : float, default=1e-3
        Stop the algorithm if w has converged.

    alpha_1 : float, default=1e-6
        Hyper-parameter : shape parameter for the Gamma distribution prior
        over the alpha parameter.

    alpha_2 : float, default=1e-6
        Hyper-parameter : inverse scale parameter (rate parameter) for the
        Gamma distribution prior over the alpha parameter.

    lambda_1 : float, default=1e-6
        Hyper-parameter : shape parameter for the Gamma distribution prior
        over the lambda parameter.

    lambda_2 : float, default=1e-6
        Hyper-parameter : inverse scale parameter (rate parameter) for the
        Gamma distribution prior over the lambda parameter.

    compute_score : bool, default=False
        If True, compute the objective function at each step of the model.

    threshold_lambda : float, default=10 000
        Threshold for removing (pruning) weights with high precision from
        the computation.

    fit_intercept : bool, default=True
        Whether to calculate the intercept for this model. If set
        to false, no intercept will be used in calculations
        (i.e. data is expected to be centered).

    copy_X : bool, default=True
        If True, X will be copied; else, it may be overwritten.

    verbose : bool, default=False
        Verbose mode when fitting the model.

    Attributes
    ----------
    coef_ : array-like of shape (n_features,)
        Coefficients of the regression model (mean of distribution)

    alpha_ : float
       estimated precision of the noise.

    lambda_ : array-like of shape (n_features,)
       estimated precisions of the weights.

    sigma_ : array-like of shape (n_features, n_features)
        estimated variance-covariance matrix of the weights

    scores_ : float
        if computed, value of the objective function (to be maximized)

    n_iter_ : int
        The actual number of iterations to reach the stopping criterion.

        .. versionadded:: 1.3

    intercept_ : float
        Independent term in decision function. Set to 0.0 if
        ``fit_intercept = False``.

    X_offset_ : float
        If `fit_intercept=True`, offset subtracted for centering data to a
        zero mean. Set to np.zeros(n_features) otherwise.

    X_scale_ : float
        Set to np.ones(n_features).

    n_features_in_ : int
        Number of features seen during :term:`fit`.

        .. versionadded:: 0.24

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during :term:`fit`. Defined only when `X`
        has feature names that are all strings.

        .. versionadded:: 1.0

    See Also
    --------
    BayesianRidge : Bayesian ridge regression.

    Notes
    -----
    For an example, see :ref:`examples/linear_model/plot_ard.py
    <sphx_glr_auto_examples_linear_model_plot_ard.py>`.

    References
    ----------
    D. J. C. MacKay, Bayesian nonlinear modeling for the prediction
    competition, ASHRAE Transactions, 1994.

    R. Salakhutdinov, Lecture notes on Statistical Machine Learning,
    http://www.utstat.toronto.edu/~rsalakhu/sta4273/notes/Lecture2.pdf#page=15
    Their beta is our ``self.alpha_``
    Their alpha is our ``self.lambda_``
    ARD is a little different than the slide: only dimensions/features for
    which ``self.lambda_ < self.threshold_lambda`` are kept and the rest are
    discarded.

    Examples
    --------
    >>> from sklearn import linear_model
    >>> clf = linear_model.ARDRegression()
    >>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])
    ARDRegression()
    >>> clf.predict([[1, 1]])
    array([1.])
    r   Nr   r   r   r   r   )r   r   r   r    r!   r"   r%   threshold_lambdar&   r'   r   r(   r)   r*   r+   Fg     @Tc                    || _         || _        |	| _        || _        || _        || _        || _        || _        || _        |
| _	        || _
        d S r-   )r   r   r&   r   r    r!   r"   r%   r   r'   r   )r.   r   r   r   r    r!   r"   r%   r   r&   r'   r   s               r/   r0   zARDRegression.__init__G  sX     !*  * 0r1   r2   c           	      F   t          | ||t          j        t          j        gddd          \  }}|j        }|j        \  }}t          j        ||          }t          ||| j        | j	                  \  }}}}}	|| _
        |	| _        t          j        |t                    }
| j        }| j        }| j        }| j        }| j        }t          j        t          j                  j        }t          j        dt          j        |          |z   z  |          }t          j        ||          }t/                      | _        d}d }||k    r| j        n| j        }t7          | j                  D ]} |||||
          } ||||||
|          }t          j        |t          j        ||          z
  dz            }d||
         t          j        |          z  z
  }|d	|z  z   ||
         dz  d	|z  z   z  ||
<   ||                                z
  d	|z  z   |d	|z  z   z  }|| j         k     }
d
||
 <   | j!        r|t          j"        |          z  ||z  z
                                  }||tE          |          z  ||z  z
  z  }|dtG          |          |tE          |          z  z   t          j        t          j"        |                    z   z  z  }|d||z  ||dz  z                                  z   z  z  }| j        $                    |           |d
k    rHt          j        t          j%        ||z
                      | j&        k     r|rtO          d|z              n,t          j(        |          }|
)                                s n|dz   | _*        |
)                                r |||||
          } ||||||
|          }n(t          j+        g           ,                    d
d
          }|| _-        || _.        || _/        || _0        | 1                    |||	           | S )a0  Fit the model according to the given training data and parameters.

        Iterative procedure to maximize the evidence

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training vector, where `n_samples` is the number of samples and
            `n_features` is the number of features.
        y : array-like of shape (n_samples,)
            Target values (integers). Will be cast to X's dtype if necessary.

        Returns
        -------
        self : object
            Fitted estimator.
        Tr   )r5   r6   r7   ensure_min_samplesr8   )r&   r9   r;   Nc                 t    |t           j                            || d d |f         j        |g          z  ||<   |S r-   )r=   r   rz   rJ   )r\   r]   rW   rU   keep_lambdarZ   s         r/   update_coeffz'ARDRegression.fit.<locals>.update_coeff  sF    !'")*=*=111k>*,a0+ + "E+ Lr1   g       @r   r|   zConverged after %s iterationsr   )2r   r=   r>   r?   r5   rB   zerosr   r&   r'   r@   rA   onesboolr!   r"   r   r    r   rC   rD   rF   rE   rG   rH   _update_sigma_update_sigma_woodburyrL   r   rP   rI   diagr   r%   r   r   rO   rQ   r   rR   r9   anyrT   rX   reshaperW   rU   rZ   rV   r[   )r.   r\   r]   r5   r`   ra   rW   r@   r^   rA   r   r!   r"   r   r    r   rD   rU   rV   rb   r   update_sigmarh   rZ   ri   rk   rj   s                              r/   rm   zARDRegression.fitb  sq   & :rz*  
 
 
1  !	:5111/?q 20
 0
 0
,1iH #  gj555==,,, hrz""& C26!99s?35AAA'*E222vv		 	 	 J&& , 	 4=)) &	 &	E!\!VWkBBF LAufk6JJE FAq% 0 00Q677E7;/"'&//AAF$*S8^$;{#)C(N:$GK   &**,,.w>g%F
 "D$99K"#E;, ! 	'w/(W2DDIIKKWs6{{*Wv-===S''#f++-.fRVG__--. 
 SFUNgq.@-E-E-G-GGHH##A&&& qyyRVBF9u+<$=$=>>II C9EABBBI??$$  qy?? 	0!\!VWkBBF LAufk6JJEEXb\\))!Q//F
Iy(;;;r1   c                    |j         d         }|d d |f         }d||                             dd          z  }t          t          j        ||j                  |z  t          j        ||z  |j                  z             }t          j        |||z            }t          j        |                    dd          |j        z  |           }|t          j        |j         d                   xx         d||         z  z  cc<   |S )Nr   r   r8   r;   )	rB   r   r   r=   eyer5   rI   rJ   diag_indices)	r.   r\   rU   rV   r   r`   X_keep
inv_lambdarZ   s	            r/   r   z$ARDRegression._update_sigma_woodbury  s    GAJ	111k>"-55a<<<
F9AG,,,v5fVj(&(334
 
  344&++B22VX=vFFFrv|A//000C'+:N4NN000r1   c                     |d d |f         }t          j        |j        |          }t          j        |j        d         |j                  }||         |z  ||z  z   }t          |          }	|	S )Nr   r8   )r=   rI   rJ   r   rB   r5   r   )
r.   r\   rU   rV   r   r   gramr   	sigma_invrZ   s
             r/   r   zARDRegression._update_sigma  so     111k>"vfh''fTZ]!'222K(3.$>	y!!r1   c                 $   |                      |          }|du r|S | j        | j        k     }t          ||d          }t	          j        || j                  |z                      d          }t	          j        |d| j	        z  z             }||fS )ro   Fr   )indicesrq   rp   r;   )
rr   rV   r   r   r=   rI   rZ   rP   rs   rU   )r.   r\   rt   ru   	col_indexrv   rw   s          r/   rx   zARDRegression.predict  s    , ((++Mt'<<Iq)!<<<A#%6!T[#9#9A#="B"B"B"J"JG/33DEFFE5= r1   r   )r   r   r   r   r   r   r   r(   r   r   r0   r
   rm   r   r   rx   r   r1   r/   r   r     s        @ @F Xh4???@q$v6667HT1d6:::;HT1d6:::;XdAtF;;;<XdAtF;;;<#%XdAtFCCCD#+;$ $D   "     6 \555} } 65}~  $	 	 	! ! ! ! ! !r1   r   )r   mathr   numbersr   r   numpyr=   scipyr   scipy.linalgr   baser	   r
   utilsr   utils._param_validationr   utils.extmathr   utils.validationr   r   _baser   r   r   r   r   r   r1   r/   <module>r      sd          " " " " " " " "                 / / / / / / / / " " " " " " . . . . . . ' ' ' ' ' ' B B B B B B B B ? ? ? ? ? ? ? ? ? ?U U U U UNK U U Uxg! g! g! g! g!NK g! g! g! g! g!r1   