
    0PhG                     J    d Z ddlZddlmZ ddlmZ d Z G d d          ZdS )	zA
Loss functions for linear models with raw_prediction = X @ coef
    N)sparse   )squared_normc                     | j         d         }t          j        |           r7| j        t          j        |df||f          z  | z                                  S |dddf         | z  }| j        |z  S )z/Compute the sandwich product X.T @ diag(W) @ X.r   shapeN)r   r   issparseT
dia_matrixtoarray)XW	n_samplesWXs       a/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/sklearn/linear_model/_linear_loss.pysandwich_dotr      sy     
Iq C&#QF9i2HIIIIAM
'))	 qqq$wZ!^sRx    c                       e Zd ZdZd ZddZd Zd Zd Z	 	 	 	 dd
Z		 	 	 	 ddZ
	 	 	 	 ddZ	 	 	 	 	 	 ddZ	 ddZdS )LinearModelLossa
	  General class for loss functions with raw_prediction = X @ coef + intercept.

    Note that raw_prediction is also known as linear predictor.

    The loss is the average of per sample losses and includes a term for L2
    regularization::

        loss = 1 / s_sum * sum_i s_i loss(y_i, X_i @ coef + intercept)
               + 1/2 * l2_reg_strength * ||coef||_2^2

    with sample weights s_i=1 if sample_weight=None and s_sum=sum_i s_i.

    Gradient and hessian, for simplicity without intercept, are::

        gradient = 1 / s_sum * X.T @ loss.gradient + l2_reg_strength * coef
        hessian = 1 / s_sum * X.T @ diag(loss.hessian) @ X
                  + l2_reg_strength * identity

    Conventions:
        if fit_intercept:
            n_dof =  n_features + 1
        else:
            n_dof = n_features

        if base_loss.is_multiclass:
            coef.shape = (n_classes, n_dof) or ravelled (n_classes * n_dof,)
        else:
            coef.shape = (n_dof,)

        The intercept term is at the end of the coef array:
        if base_loss.is_multiclass:
            if coef.shape (n_classes, n_dof):
                intercept = coef[:, -1]
            if coef.shape (n_classes * n_dof,)
                intercept = coef[n_features::n_dof] = coef[(n_dof-1)::n_dof]
            intercept.shape = (n_classes,)
        else:
            intercept = coef[-1]

        Shape of gradient follows shape of coef.
        gradient.shape = coef.shape

        But hessian (to make our lives simpler) are always 2-d:
        if base_loss.is_multiclass:
            hessian.shape = (n_classes * n_dof, n_classes * n_dof)
        else:
            hessian.shape = (n_dof, n_dof)

    Note: If coef has shape (n_classes * n_dof,), the 2d-array can be reconstructed as

        coef.reshape((n_classes, -1), order="F")

    The option order="F" makes coef[:, i] contiguous. This, in turn, makes the
    coefficients without intercept, coef[:, :-1], contiguous and speeds up
    matrix-vector computations.

    Note: If the average loss per sample is wanted instead of the sum of the loss per
    sample, one can simply use a rescaled sample_weight such that
    sum(sample_weight) = 1.

    Parameters
    ----------
    base_loss : instance of class BaseLoss from sklearn._loss.
    fit_intercept : bool
    c                 "    || _         || _        d S N)	base_lossfit_intercept)selfr   r   s      r   __init__zLinearModelLoss.__init__h   s    "*r   Nc                     |j         d         }| j        j        }| j        r|dz   }n|}| j        j        rt          j        |||f|d          }nt          j        |||          }|S )a  Allocate coef of correct shape with zeros.

        Parameters:
        -----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Training data.
        dtype : data-type, default=None
            Overrides the data type of coef. With dtype=None, coef will have the same
            dtype as X.

        Returns
        -------
        coef : ndarray of shape (n_dof,) or (n_classes, n_dof)
            Coefficients of a linear model.
           F)r   dtypeorder)r   r   )r   r   	n_classesr   is_multiclassnp
zeros_like)r   r   r   
n_featuresr!   n_dofcoefs          r   init_zero_coefzLinearModelLoss.init_zero_coefl   s|      WQZ
N,	 	NEEE>' 	>=9e*<EQTUUUDD=%u===Dr   c                 
   | j         j        s| j        r|d         }|dd         }nZd}|}nU|j        dk    r$|                    | j         j        dfd          }n|}| j        r|dddf         }|ddddf         }nd}||fS )a  Helper function to get coefficients and intercept.

        Parameters
        ----------
        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)
            Coefficients of a linear model.
            If shape (n_classes * n_dof,), the classes of one feature are contiguous,
            i.e. one reconstructs the 2d-array via
            coef.reshape((n_classes, -1), order="F").

        Returns
        -------
        weights : ndarray of shape (n_features,) or (n_classes, n_features)
            Coefficients without intercept term.
        intercept : float or ndarray of shape (n_classes,)
            Intercept terms.
        N        r   r   r    )r   r"   r   ndimreshaper!   )r   r'   	interceptweightss       r   weight_interceptz LinearModelLoss.weight_intercept   s    $ ~+ 	 !  H	ss)	 yA~~,,(@"'ES,QQ!  #AAArEN	!!!!SbS&/		!!r   c                     |                      |          \  }}| j        j        s	||z  |z   }n||j        z  |z   }|||fS )ai  Helper function to get coefficients, intercept and raw_prediction.

        Parameters
        ----------
        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)
            Coefficients of a linear model.
            If shape (n_classes * n_dof,), the classes of one feature are contiguous,
            i.e. one reconstructs the 2d-array via
            coef.reshape((n_classes, -1), order="F").
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Training data.

        Returns
        -------
        weights : ndarray of shape (n_features,) or (n_classes, n_features)
            Coefficients without intercept term.
        intercept : float or ndarray of shape (n_classes,)
            Intercept terms.
        raw_prediction : ndarray of shape (n_samples,) or             (n_samples, n_classes)
        )r1   r   r"   r
   )r   r'   r   r0   r/   raw_predictions         r   weight_intercept_rawz$LinearModelLoss.weight_intercept_raw   sX    , "22488~+ 	7[94NN ]Y6N	>11r   c                 P    |j         dk    r||z  nt          |          }d|z  |z  S )z5Compute L2 penalty term l2_reg_strength/2 *||w||_2^2.r   g      ?)r-   r   )r   r0   l2_reg_strengthnorm2_ws       r   
l2_penaltyzLinearModelLoss.l2_penalty   s5    '.|q'8'8'G##l7>S>S_$w..r   r+   r   c                    ||                      ||          \  }}	}n|                     |          \  }}	| j                            ||d|          }
t	          j        |
|          }
|
|                     ||          z   S )a  Compute the loss as weighted average over point-wise losses.

        Parameters
        ----------
        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)
            Coefficients of a linear model.
            If shape (n_classes * n_dof,), the classes of one feature are contiguous,
            i.e. one reconstructs the 2d-array via
            coef.reshape((n_classes, -1), order="F").
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Training data.
        y : contiguous array of shape (n_samples,)
            Observed, true target values.
        sample_weight : None or contiguous array of shape (n_samples,), default=None
            Sample weights.
        l2_reg_strength : float, default=0.0
            L2 regularization strength
        n_threads : int, default=1
            Number of OpenMP threads to use.
        raw_prediction : C-contiguous array of shape (n_samples,) or array of             shape (n_samples, n_classes)
            Raw prediction values (in link space). If provided, these are used. If
            None, then raw_prediction = X @ coef + intercept is calculated.

        Returns
        -------
        loss : float
            Weighted average of losses per sample, plus penalty.
        Ny_truer3   sample_weight	n_threadsr0   )r4   r1   r   lossr#   averager8   )r   r'   r   yr<   r6   r=   r3   r0   r/   r?   s              r   r?   zLinearModelLoss.loss   s    N !151J1J4QR1S1S.GY!%!6!6t!<!<GY~"")	 # 
 
 z$666doog????r   c                 Z   |j         | j        j        c\  }}	}
|	t          | j                  z   }||                     ||          \  }}}n|                     |          \  }}| j                            ||||          \  }}||nt          j	        |          }|	                                |z  }|| 
                    ||          z  }||z  }| j        j        sOt          j        ||j                  }|j        |z  ||z  z   |d|	<   | j        r|	                                |d<   n|t          j        |
|f|j        d          }|j        |z  ||z  z   |ddd|	f<   | j        r|	                    d          |dddf<   |j        d	k    r|                    d
          }||fS )a\  Computes the sum of loss and gradient w.r.t. coef.

        Parameters
        ----------
        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)
            Coefficients of a linear model.
            If shape (n_classes * n_dof,), the classes of one feature are contiguous,
            i.e. one reconstructs the 2d-array via
            coef.reshape((n_classes, -1), order="F").
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Training data.
        y : contiguous array of shape (n_samples,)
            Observed, true target values.
        sample_weight : None or contiguous array of shape (n_samples,), default=None
            Sample weights.
        l2_reg_strength : float, default=0.0
            L2 regularization strength
        n_threads : int, default=1
            Number of OpenMP threads to use.
        raw_prediction : C-contiguous array of shape (n_samples,) or array of             shape (n_samples, n_classes)
            Raw prediction values (in link space). If provided, these are used. If
            None, then raw_prediction = X @ coef + intercept is calculated.

        Returns
        -------
        loss : float
            Weighted average of losses per sample, plus penalty.

        gradient : ndarray of shape coef.shape
             The gradient of the loss.
        Nr:   r   r*   r   r   r    r   axisr   r,   )r   r   r!   intr   r4   r1   loss_gradientr#   sumr8   r"   
empty_liker   r
   emptyr-   ravel)r   r'   r   rA   r<   r6   r=   r3   r   r%   r!   r&   r0   r/   r?   grad_pointwisesw_sumgrads                     r   rH   zLinearModelLoss.loss_gradient
  s   T ./Wdn6N*JS!3444!151J1J4QR1S1S.GY!%!6!6t!<!<GY#~;;)'	  <  
  
n ,39N9NxxzzF"999& ~+ 	-=W];;;D !n 47P PD*! 0)--//R8Y.gm3OOOD#1#3a#7/G:S#SDKZK ! 9,00a088QQQUyA~~zzz,,Tzr   c                    |j         | j        j        c\  }}	}
|	t          | j                  z   }||                     ||          \  }}}n|                     |          \  }}| j                            ||||          }||nt          j	        |          }||z  }| j        j
        sPt          j        ||j                  }|j        |z  ||z  z   |d|	<   | j        r|	                                |d<   |S t          j        |
|f|j        d          }|j        |z  ||z  z   |ddd|	f<   | j        r|	                    d          |dddf<   |j        d	k    r|                    d
          S |S )a  Computes the gradient w.r.t. coef.

        Parameters
        ----------
        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)
            Coefficients of a linear model.
            If shape (n_classes * n_dof,), the classes of one feature are contiguous,
            i.e. one reconstructs the 2d-array via
            coef.reshape((n_classes, -1), order="F").
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Training data.
        y : contiguous array of shape (n_samples,)
            Observed, true target values.
        sample_weight : None or contiguous array of shape (n_samples,), default=None
            Sample weights.
        l2_reg_strength : float, default=0.0
            L2 regularization strength
        n_threads : int, default=1
            Number of OpenMP threads to use.
        raw_prediction : C-contiguous array of shape (n_samples,) or array of             shape (n_samples, n_classes)
            Raw prediction values (in link space). If provided, these are used. If
            None, then raw_prediction = X @ coef + intercept is calculated.

        Returns
        -------
        gradient : ndarray of shape coef.shape
             The gradient of the loss.
        Nr:   rC   r*   r   rD   r   rE   r   r,   )r   r   r!   rG   r   r4   r1   gradientr#   rI   r"   rJ   r   r
   rK   r-   rL   )r   r'   r   rA   r<   r6   r=   r3   r   r%   r!   r&   r0   r/   rM   rN   rO   s                    r   rQ   zLinearModelLoss.gradientX  s   N ./Wdn6N*JS!3444!151J1J4QR1S1S.GY!%!6!6t!<!<GY00)'	 1 
 
 ,39N9N& ~+ 	=W];;;D !n 47P PD*! 0)--//RK8Y.gm3OOOD#1#3a#7/G:S#SDKZK ! 9,00a088QQQUyA~~zzz,,,r   c
                 `
   |j         | j        j        c\  }
}}|t          | j                  z   }|	|                     ||          \  }}}	n|                     |          \  }}||
nt          j        |          }|t          j	        ||j
        d          }nY|j         |j         k    r t          d|j          d|j          d          | j        j        r|j        j        st          d          |}|j        }|t          j        ||f|j
                  }n_|j         ||fk    rt          d	||f d
|j         d          | j        j        r'|j        j        s|j        j        st          d          |}| j        j        s0| j                            ||	||          \  }}||z  }||z  }t          j        |dk    |          dk    }t          j        |          }|j        |z  ||z  z   |d|<   | j        r|                                |d<   |r|||fS t-          ||          |d|d|f<   |dk    r>|j        j        rdnd}|                    d|          d||z  |dz   xx         |z  cc<   | j        r3|j        |z  }||dddf<   ||dddf<   |                                |d<   nt| j                            ||	||          \  }}||z  }|                    ||fd          }|j        |z  ||z  z   |ddd|f<   | j        r|                    d          |dddf<   |j        dk    r|                    d          }|||z  }nd|z  }t7          |          D ]j}|dd|f         d|dd|f         z
  z  |z  }t-          ||          ||||z  ||||z  |f<   | j        rU|j        |z  }|||||z  |||z  |z   f<   ||||z  |z   |||z  |f<   |                                |||z  |z   ||z  |z   f<   t7          |dz   |          D ]}|dd|f          |dd|f         z  |z  }t-          ||          ||||z  ||||z  |f<   | j        rU|j        |z  }|||||z  |||z  |z   f<   ||||z  |z   |||z  |f<   |                                |||z  |z   ||z  |z   f<   ||d||d|f         ||d||d|f<   l|dk    rG|j        j        rdnd}|                    d|          d|dz  |z  |z  ||z  dz   xx         |z  cc<   d}|||fS )a~  Computes gradient and hessian w.r.t. coef.

        Parameters
        ----------
        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)
            Coefficients of a linear model.
            If shape (n_classes * n_dof,), the classes of one feature are contiguous,
            i.e. one reconstructs the 2d-array via
            coef.reshape((n_classes, -1), order="F").
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Training data.
        y : contiguous array of shape (n_samples,)
            Observed, true target values.
        sample_weight : None or contiguous array of shape (n_samples,), default=None
            Sample weights.
        l2_reg_strength : float, default=0.0
            L2 regularization strength
        n_threads : int, default=1
            Number of OpenMP threads to use.
        gradient_out : None or ndarray of shape coef.shape
            A location into which the gradient is stored. If None, a new array
            might be created.
        hessian_out : None or ndarray of shape (n_dof, n_dof) or             (n_classes * n_dof, n_classes * n_dof)
            A location into which the hessian is stored. If None, a new array
            might be created.
        raw_prediction : C-contiguous array of shape (n_samples,) or array of             shape (n_samples, n_classes)
            Raw prediction values (in link space). If provided, these are used. If
            None, then raw_prediction = X @ coef + intercept is calculated.

        Returns
        -------
        gradient : ndarray of shape coef.shape
             The gradient of the loss.

        hessian : ndarray of shape (n_dof, n_dof) or             (n_classes, n_dof, n_dof, n_classes)
            Hessian matrix.

        hessian_warning : bool
            True if pointwise hessian has more than 25% of its elements non-positive.
        Nr   rD   z4gradient_out is required to have shape coef.shape = z; got .z"gradient_out must be F-contiguous.rC   z'hessian_out is required to have shape (z); got hessian_out.shape=zhessian_out must be contiguous.r:   r   r>   g      ?r*   Cr,   r   )r*   r*   rE   g      ?r   F)r   r   r!   rG   r   r4   r1   r#   rI   rJ   r   
ValueErrorr"   flagsf_contiguoussizerK   c_contiguousgradient_hessianr@   absr
   r   r.   gradient_probar-   rL   range)r   r'   r   rA   r<   r6   r=   gradient_outhessian_outr3   r   r%   r!   r&   r0   r/   rN   rO   nhessrM   hess_pointwisehessian_warningr    Xhprobaswkhls                                 r   rZ   z LinearModelLoss.gradient_hessian  s   n ./Wdn6N*JS!3444!151J1J4QR1S1S.GY!%!6!6t!<!<GY+39N9N =W]#FFFDD4:---tz - -#)- - -   ^) 	 ,2D2Q 	 ABBBDI8QF'-888DD1a&(()!Q ) )$) ) )   ^) 	!.	7B7H7U	 >???D~+ b	$-1^-L-L-+#	 .M . .*NN f$Nf$N 
>Q.FFFM   VN33N !n 47P PD*! 0)--//R 3T?22-9!^-L-LD*kzk)*""  $z6?CRu--8zE)eai8  $%    ! 
4 S>) "SbS"W "R"W-1133V %)N$A$A-+#	 %B % %!NE f$N<<E 2#<>>D#1#3a#7/G:S#SDKZK ! 9,00a088QQQUyA~~zzz,,L ("V+6\9%% ,X ,X !!!Q$K1uQQQT{?3b8 !A&& 	J.:	J.:< % qB  I
2Y>!J.24  !J.2I
2Y>@
  Z/!3Y5Ka5OOP q1ui00 X XAqqq!tuQQQT{2R7A %Q** I
2Y>I
2Y>@ ) S1W  	J 6B%
2Q68  %
2Q6	J 6BD
 EEGG Y3a7Z9ORS9SST 8<ALyL!,Y,<V7WDIq|)|344+X. ""#z6?CRu--Sy!|j058Y=NQR=RS  $%   
 $OT?**r   c                     j          j        j        c\  }t           j                  z                                  \  }}	|nt          j                   j        j        s9 j        	                    ||	|          \  }
}|
z  }
|z  }t          j
        j                  }j        |
z  z  z   |d<    j        r|
                                |d<   |                                t          j                  rt          j        |df||f          z  n|ddt          j        f         z   j        rNt          j        t          j                            d                              t          j                   fd}n j                            ||	|          \  }
|
z  }
t          j        fj        d	
          }|
j        z  z  z   |dddf<    j        r|
                    d          |dddf<    fd}j        dk    r|                    d	          |fS ||fS )a  Computes gradient and hessp (hessian product function) w.r.t. coef.

        Parameters
        ----------
        coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)
            Coefficients of a linear model.
            If shape (n_classes * n_dof,), the classes of one feature are contiguous,
            i.e. one reconstructs the 2d-array via
            coef.reshape((n_classes, -1), order="F").
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Training data.
        y : contiguous array of shape (n_samples,)
            Observed, true target values.
        sample_weight : None or contiguous array of shape (n_samples,), default=None
            Sample weights.
        l2_reg_strength : float, default=0.0
            L2 regularization strength
        n_threads : int, default=1
            Number of OpenMP threads to use.

        Returns
        -------
        gradient : ndarray of shape coef.shape
             The gradient of the loss.

        hessp : callable
            Function that takes in a vector input of shape of gradient and
            and returns matrix-vector product with hessian.
        Nr:   rC   r*   r   r   rE   c                    t          j        |           }t          j                  rj        | d          z  z  |d <   n4t           j                            j        | d          g          |d <   |d xx         | d          z  z  cc<   j        r7|d xx         | d         z  z  cc<   | d          z  | d         z  z   |d<   |S )Nr*   )r#   rJ   r   r	   r
   linalg	multi_dotr   )	sretr   hXhX_sumhessian_sumr6   r%   r   s	     r   hesspz7LinearModelLoss.gradient_hessian_product.<locals>.hessp  s   mA&&?1%% V'(sb1[j[>.A'BC$$')y':':ACQ{
{^;T'U'UC$KZK   Oan$DD   % L$$$"6$$$$q*~5ae8KKCG
r   r   rD   c                 B   |                      dfd          } j        r| d d df         }| d d d df         } nd}| j        z  |z   }|
 |z                      d          d d t          j        f         z  }|
z  }|d d t          j        f         z  }t	          j        fj        d          }|j        z  z  | z  z   |d d d 	f<   j        r |                    d          z  |d d df<   j        dk    r|	                    d          S |S )Nr*   r   r,   r   r   rE   rD   )
r.   r   r
   rI   r#   newaxisrK   r   r-   rL   )rn   s_intercepttmp	hess_prodr   r'   r6   r!   r&   r%   re   r<   r   rN   r0   s       r   rs   z7LinearModelLoss.gradient_hessian_product.<locals>.hessp  s\   IIy"oSI99% $"#AAArE(K!!!SbS&	AA"#K!#g+))q)11!!!RZ-@@u ,=BJ77C Hi%7w}TWXXX	-0UQY&,@?UVCV,V	!!![j[.)% @'*wwAw'?Iaaae$9>>$???555$$r   r   r,   )r   r   r!   rG   r   r4   r#   rI   r"   rZ   rJ   r   r
   r   r	   r   ru   squeezeasarray
atleast_1dr\   rK   r-   rL   )r   r'   r   rA   r<   r6   r=   r   r/   r3   rM   rb   rO   rs   rp   rq   rr   r!   r&   r%   re   rN   r0   s   ``` ``        @@@@@@@@@r   gradient_hessian_productz(LinearModelLoss.gradient_hessian_product  s-   @ ./Wdn6N*JS!3444-1-F-FtQ-O-O*N+39N9N~+ p	4-1^-L-L-+#	 .M . .*NN f$Nf$N=W];;;D !n 47P PD*! 0)--//R ),,..Kq!! 7%~q&9)YAWXXX 
 $AAArzM2Q6! / BJrvv1v~~$>$>??v..           & %)N$A$A-+#	 %B % %!NE f$N8Y.gm3OOOD#1#3a#7/G:S#SDKZK ! 9,00a088QQQU.% % % % % % % % % % % % % % %. yA~~zzz,,e33U{r   r   )Nr+   r   N)Nr+   r   NNN)Nr+   r   )__name__
__module____qualname____doc__r   r(   r1   r4   r8   r?   rH   rQ   rZ   r|    r   r   r   r   %   s6       @ @D+ + +   8%" %" %"N2 2 2@/ / / 4@ 4@ 4@ 4@v L L L Lf G G G G\ + + + +D NOW W W W W Wr   r   )	r   numpyr#   scipyr   utils.extmathr   r   r   r   r   r   <module>r      s               ( ( ( ( ( (  .T T T T T T T T T Tr   