
    0Ph@X                         d Z ddlZddlmZ ddlmZ ddlm	Z	m
Z
 ddlmZ ddlmZ dd	lmZ dd
lmZ d Zd Zd Zd ZddZdddZd Zd Zd Zd Zd Zd Zd dZ d!dZ!d Z"d Z#d Z$d Z%dS )"zBA collection of utilities to work with sparse matrices and arrays.    N)LinearOperator   )_sparse_min_max_sparse_nan_min_max)_check_sample_weight   )csc_mean_variance_axis0)csr_mean_variance_axis0)incr_mean_variance_axis0c                 ~    t          j        |           r| j        nt          |           }d|z  }t	          |          )z2Raises a TypeError if X is not a CSR or CSC matrixz,Expected a CSR or CSC sparse matrix, got %s.)spissparseformattype	TypeError)X
input_typeerrs      Y/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/sklearn/utils/sparsefuncs.py_raise_typeerrorr      s6    [^^8aJ
8:
EC
C..    c                 2    | dvrt          d| z            d S )N)r   r   z8Unknown axis value: %d. Use 0 for rows, or 1 for columns)
ValueErroraxiss    r   _raise_error_wrong_axisr      s/    6FM
 
 	
 r   c                     |j         d         | j         d         k    sJ | xj        |                    | j        d          z  c_        dS )a  Inplace column scaling of a CSR matrix.

    Scale each feature of the data matrix by multiplying with specific scale
    provided by the caller assuming a (n_samples, n_features) shape.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Matrix to normalize using the variance of the features.
        It should be of CSR format.

    scale : ndarray of shape (n_features,), dtype={np.float32, np.float64}
        Array of precomputed feature-wise values to use for scaling.

    Examples
    --------
    >>> from sklearn.utils import sparsefuncs
    >>> from scipy import sparse
    >>> import numpy as np
    >>> indptr = np.array([0, 3, 4, 4, 4])
    >>> indices = np.array([0, 1, 2, 2])
    >>> data = np.array([8, 1, 2, 5])
    >>> scale = np.array([2, 3, 2])
    >>> csr = sparse.csr_matrix((data, indices, indptr))
    >>> csr.todense()
    matrix([[8, 1, 2],
            [0, 0, 5],
            [0, 0, 0],
            [0, 0, 0]])
    >>> sparsefuncs.inplace_csr_column_scale(csr, scale)
    >>> csr.todense()
    matrix([[16,  3,  4],
            [ 0,  0, 10],
            [ 0,  0,  0],
            [ 0,  0,  0]])
    r   r   clip)modeN)shapedatatakeindicesr   scales     r   inplace_csr_column_scaler&   %   sH    J ;q>QWQZ''''FFejjj000FFFFr   c                     |j         d         | j         d         k    sJ | xj        t          j        |t          j        | j                            z  c_        dS )a  Inplace row scaling of a CSR matrix.

    Scale each sample of the data matrix by multiplying with specific scale
    provided by the caller assuming a (n_samples, n_features) shape.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Matrix to be scaled. It should be of CSR format.

    scale : ndarray of float of shape (n_samples,)
        Array of precomputed sample-wise values to use for scaling.
    r   N)r    r!   nprepeatdiffindptrr$   s     r   inplace_csr_row_scaler,   N   sM     ;q>QWQZ''''FFbirwqx00111FFFFr   Fc                 z   t          |           t          j        |           r:| j        dk    r/|dk    rt	          | ||          S t          | j        ||          S t          j        |           r:| j        dk    r/|dk    rt          | ||          S t	          | j        ||          S t          |            dS )a{  Compute mean and variance along an axis on a CSR or CSC matrix.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Input data. It can be of CSR or CSC format.

    axis : {0, 1}
        Axis along which the axis should be computed.

    weights : ndarray of shape (n_samples,) or (n_features,), default=None
        If axis is set to 0 shape is (n_samples,) or
        if axis is set to 1 shape is (n_features,).
        If it is set to None, then samples are equally weighted.

        .. versionadded:: 0.24

    return_sum_weights : bool, default=False
        If True, returns the sum of weights seen for each feature
        if `axis=0` or each sample if `axis=1`.

        .. versionadded:: 0.24

    Returns
    -------

    means : ndarray of shape (n_features,), dtype=floating
        Feature-wise means.

    variances : ndarray of shape (n_features,), dtype=floating
        Feature-wise variances.

    sum_weights : ndarray of shape (n_features,), dtype=floating
        Returned if `return_sum_weights` is `True`.

    Examples
    --------
    >>> from sklearn.utils import sparsefuncs
    >>> from scipy import sparse
    >>> import numpy as np
    >>> indptr = np.array([0, 3, 4, 4, 4])
    >>> indices = np.array([0, 1, 2, 2])
    >>> data = np.array([8, 1, 2, 5])
    >>> scale = np.array([2, 3, 2])
    >>> csr = sparse.csr_matrix((data, indices, indptr))
    >>> csr.todense()
    matrix([[8, 1, 2],
            [0, 0, 5],
            [0, 0, 0],
            [0, 0, 0]])
    >>> sparsefuncs.mean_variance_axis(csr, axis=0)
    (array([2.  , 0.25, 1.75]), array([12.    ,  0.1875,  4.1875]))
    csrr   )weightsreturn_sum_weightscscN)r   r   r   r   _csr_mean_var_axis0_csc_mean_var_axis0Tr   )r   r   r/   r0   s       r   mean_variance_axisr5   `   s    l D!!!	{1~~ !(e++199&77I    'W9K    
Q 
AH--199&77I    'W9K    	r   )r/   c                p   t          |           t          j        |           r	| j        dv st	          |            t          j        |          dk    r!t          j        |j        ||j	                  }t          j        |          t          j        |          cxk    rt          j        |          k    sn t          d          |dk    rWt          j        |          | j        d         k    r3t          d| j        d          dt          j        |           d          nVt          j        |          | j        d         k    r3t          d	| j        d          dt          j        |           d          |dk    r| j        n| } |t          || | j	                  }t          | ||||          S )a  Compute incremental mean and variance along an axis on a CSR or CSC matrix.

    last_mean, last_var are the statistics computed at the last step by this
    function. Both must be initialized to 0-arrays of the proper size, i.e.
    the number of features in X. last_n is the number of samples encountered
    until now.

    Parameters
    ----------
    X : CSR or CSC sparse matrix of shape (n_samples, n_features)
        Input data.

    axis : {0, 1}
        Axis along which the axis should be computed.

    last_mean : ndarray of shape (n_features,) or (n_samples,), dtype=floating
        Array of means to update with the new data X.
        Should be of shape (n_features,) if axis=0 or (n_samples,) if axis=1.

    last_var : ndarray of shape (n_features,) or (n_samples,), dtype=floating
        Array of variances to update with the new data X.
        Should be of shape (n_features,) if axis=0 or (n_samples,) if axis=1.

    last_n : float or ndarray of shape (n_features,) or (n_samples,),             dtype=floating
        Sum of the weights seen so far, excluding the current weights
        If not float, it should be of shape (n_features,) if
        axis=0 or (n_samples,) if axis=1. If float it corresponds to
        having same weights for all samples (or features).

    weights : ndarray of shape (n_samples,) or (n_features,), default=None
        If axis is set to 0 shape is (n_samples,) or
        if axis is set to 1 shape is (n_features,).
        If it is set to None, then samples are equally weighted.

        .. versionadded:: 0.24

    Returns
    -------
    means : ndarray of shape (n_features,) or (n_samples,), dtype=floating
        Updated feature-wise means if axis = 0 or
        sample-wise means if axis = 1.

    variances : ndarray of shape (n_features,) or (n_samples,), dtype=floating
        Updated feature-wise variances if axis = 0 or
        sample-wise variances if axis = 1.

    n : ndarray of shape (n_features,) or (n_samples,), dtype=integral
        Updated number of seen samples per feature if axis=0
        or number of seen features per sample if axis=1.

        If weights is not None, n is a sum of the weights of the seen
        samples or features instead of the actual number of seen
        samples or features.

    Notes
    -----
    NaNs are ignored in the algorithm.

    Examples
    --------
    >>> from sklearn.utils import sparsefuncs
    >>> from scipy import sparse
    >>> import numpy as np
    >>> indptr = np.array([0, 3, 4, 4, 4])
    >>> indices = np.array([0, 1, 2, 2])
    >>> data = np.array([8, 1, 2, 5])
    >>> scale = np.array([2, 3, 2])
    >>> csr = sparse.csr_matrix((data, indices, indptr))
    >>> csr.todense()
    matrix([[8, 1, 2],
            [0, 0, 5],
            [0, 0, 0],
            [0, 0, 0]])
    >>> sparsefuncs.incr_mean_variance_axis(
    ...     csr, axis=0, last_mean=np.zeros(3), last_var=np.zeros(3), last_n=2
    ... )
    (array([1.3..., 0.1..., 1.1...]), array([8.8..., 0.1..., 3.4...]),
    array([6., 6., 6.]))
    )r1   r.   r   )dtypez8last_mean, last_var, last_n do not have the same shapes.r   zHIf axis=1, then last_mean, last_n, last_var should be of size n_samples z (Got z).zIIf axis=0, then last_mean, last_n, last_var should be of size n_features N)	last_meanlast_varlast_nr/   )r   r   r   r   r   r(   sizefullr    r7   r   r4   r   _incr_mean_var_axis0)r   r   r8   r9   r:   r/   s         r   incr_mean_variance_axisr>      s   b D!!!KNN qx>99	wv!&	HHHGI"'("3"3FFFFrwvFFFFSTTTqyy79++K"#'!*K K46GI4F4FK K K   , 79++L#$71:L L57WY5G5GL L L  
 qyyaA&wAAA	Y&'   r   c                     t          j        |           r"| j        dk    rt          | j        |           dS t          j        |           r| j        dk    rt          | |           dS t          |            dS )a  Inplace column scaling of a CSC/CSR matrix.

    Scale each feature of the data matrix by multiplying with specific scale
    provided by the caller assuming a (n_samples, n_features) shape.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Matrix to normalize using the variance of the features. It should be
        of CSC or CSR format.

    scale : ndarray of shape (n_features,), dtype={np.float32, np.float64}
        Array of precomputed feature-wise values to use for scaling.

    Examples
    --------
    >>> from sklearn.utils import sparsefuncs
    >>> from scipy import sparse
    >>> import numpy as np
    >>> indptr = np.array([0, 3, 4, 4, 4])
    >>> indices = np.array([0, 1, 2, 2])
    >>> data = np.array([8, 1, 2, 5])
    >>> scale = np.array([2, 3, 2])
    >>> csr = sparse.csr_matrix((data, indices, indptr))
    >>> csr.todense()
    matrix([[8, 1, 2],
            [0, 0, 5],
            [0, 0, 0],
            [0, 0, 0]])
    >>> sparsefuncs.inplace_column_scale(csr, scale)
    >>> csr.todense()
    matrix([[16,  3,  4],
            [ 0,  0, 10],
            [ 0,  0,  0],
            [ 0,  0,  0]])
    r1   r.   N)r   r   r   r,   r4   r&   r   r$   s     r   inplace_column_scaler@   !  s    J 
{1~~ !(e++ac5)))))	Q AH-- E*****r   c                     t          j        |           r"| j        dk    rt          | j        |           dS t          j        |           r| j        dk    rt          | |           dS t          |            dS )a  Inplace row scaling of a CSR or CSC matrix.

    Scale each row of the data matrix by multiplying with specific scale
    provided by the caller assuming a (n_samples, n_features) shape.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Matrix to be scaled. It should be of CSR or CSC format.

    scale : ndarray of shape (n_features,), dtype={np.float32, np.float64}
        Array of precomputed sample-wise values to use for scaling.

    Examples
    --------
    >>> from sklearn.utils import sparsefuncs
    >>> from scipy import sparse
    >>> import numpy as np
    >>> indptr = np.array([0, 2, 3, 4, 5])
    >>> indices = np.array([0, 1, 2, 3, 3])
    >>> data = np.array([8, 1, 2, 5, 6])
    >>> scale = np.array([2, 3, 4, 5])
    >>> csr = sparse.csr_matrix((data, indices, indptr))
    >>> csr.todense()
    matrix([[8, 1, 0, 0],
            [0, 0, 2, 0],
            [0, 0, 0, 5],
            [0, 0, 0, 6]])
    >>> sparsefuncs.inplace_row_scale(csr, scale)
    >>> csr.todense()
     matrix([[16,  2,  0,  0],
             [ 0,  0,  6,  0],
             [ 0,  0,  0, 20],
             [ 0,  0,  0, 30]])
    r1   r.   N)r   r   r   r&   r4   r,   r   r$   s     r   inplace_row_scalerB   N  s    H 
{1~~ !(e++ e,,,,,	Q AH--a'''''r   c                    ||fD ]+}t          |t          j                  rt          d          ,|dk     r|| j        d         z  }|dk     r|| j        d         z  }| j        |k    }|| j        | j        |k    <   || j        |<   dS )aK  Swap two rows of a CSC matrix in-place.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Matrix whose two rows are to be swapped. It should be of
        CSC format.

    m : int
        Index of the row of X to be swapped.

    n : int
        Index of the row of X to be swapped.
     m and n should be valid integersr   N)
isinstancer(   ndarrayr   r    r#   )r   mntm_masks        r   inplace_swap_row_cscrK   z  s     V @ @a$$ 	@>???	@ 	1uu	QWQZ1uu	QWQZY!^F !AIai1nAIfr   c           	      <   ||fD ]+}t          |t          j                  rt          d          ,|dk     r|| j        d         z  }|dk     r|| j        d         z  }||k    r||}}| j        }||         }||dz            }||         }||dz            }||z
  }	||z
  }
|	|
k    r:| j        |dz   |xx         |
|	z
  z  cc<   ||
z   | j        |dz   <   ||	z
  | j        |<   t          j        | j        d|         | j        ||         | j        ||         | j        ||         | j        |d         g          | _        t          j        | j        d|         | j        ||         | j        ||         | j        ||         | j        |d         g          | _        dS )aK  Swap two rows of a CSR matrix in-place.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Matrix whose two rows are to be swapped. It should be of
        CSR format.

    m : int
        Index of the row of X to be swapped.

    n : int
        Index of the row of X to be swapped.
    rD   r   r   r   N)	rE   r(   rF   r   r    r+   concatenater#   r!   )r   rG   rH   rI   r+   m_startm_stopn_startn_stopnz_mnz_ns              r   inplace_swap_row_csrrT     s    V @ @a$$ 	@>???	@ 	1uu	QWQZ1uu	QWQZ 	1uu!1XFQiGAE]FQiGAE]FGDGDt||	Qtd{*!D.QtmIhwhIgfn%IfWn%Igfn%Ifgg	
 AI ^F8G8F76>"F6'>"F76>"F677O	
 AFFFr   c                     t          j        |           r| j        dk    rt          | ||           dS t          j        |           r| j        dk    rt	          | ||           dS t          |            dS )a  
    Swap two rows of a CSC/CSR matrix in-place.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Matrix whose two rows are to be swapped. It should be of CSR or
        CSC format.

    m : int
        Index of the row of X to be swapped.

    n : int
        Index of the row of X to be swapped.

    Examples
    --------
    >>> from sklearn.utils import sparsefuncs
    >>> from scipy import sparse
    >>> import numpy as np
    >>> indptr = np.array([0, 2, 3, 3, 3])
    >>> indices = np.array([0, 2, 2])
    >>> data = np.array([8, 2, 5])
    >>> csr = sparse.csr_matrix((data, indices, indptr))
    >>> csr.todense()
    matrix([[8, 0, 2],
            [0, 0, 5],
            [0, 0, 0],
            [0, 0, 0]])
    >>> sparsefuncs.inplace_swap_row(csr, 0, 1)
    >>> csr.todense()
    matrix([[0, 0, 5],
            [8, 0, 2],
            [0, 0, 0],
            [0, 0, 0]])
    r1   r.   N)r   r   r   rK   rT   r   r   rG   rH   s      r   inplace_swap_rowrW     s    J 
{1~~ !(e++Q1%%%%%	Q AH--Q1%%%%%r   c                 D   |dk     r|| j         d         z  }|dk     r|| j         d         z  }t          j        |           r| j        dk    rt	          | ||           dS t          j        |           r| j        dk    rt          | ||           dS t          |            dS )a  
    Swap two columns of a CSC/CSR matrix in-place.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Matrix whose two columns are to be swapped. It should be of
        CSR or CSC format.

    m : int
        Index of the column of X to be swapped.

    n : int
        Index of the column of X to be swapped.

    Examples
    --------
    >>> from sklearn.utils import sparsefuncs
    >>> from scipy import sparse
    >>> import numpy as np
    >>> indptr = np.array([0, 2, 3, 3, 3])
    >>> indices = np.array([0, 2, 2])
    >>> data = np.array([8, 2, 5])
    >>> csr = sparse.csr_matrix((data, indices, indptr))
    >>> csr.todense()
    matrix([[8, 0, 2],
            [0, 0, 5],
            [0, 0, 0],
            [0, 0, 0]])
    >>> sparsefuncs.inplace_swap_column(csr, 0, 1)
    >>> csr.todense()
    matrix([[0, 8, 2],
            [0, 0, 5],
            [0, 0, 0],
            [0, 0, 0]])
    r   r   r1   r.   N)r    r   r   r   rT   rK   r   rV   s      r   inplace_swap_columnrY     s    J 	1uu	QWQZ1uu	QWQZ	{1~~ !(e++Q1%%%%%	Q AH--Q1%%%%%r   c                     t          j        |           r-| j        dv r$|rt          | |          S t	          | |          S t          |            dS )a  Compute minimum and maximum along an axis on a CSR or CSC matrix.

     Optionally ignore NaN values.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Input data. It should be of CSR or CSC format.

    axis : {0, 1}
        Axis along which the axis should be computed.

    ignore_nan : bool, default=False
        Ignore or passing through NaN values.

        .. versionadded:: 0.20

    Returns
    -------

    mins : ndarray of shape (n_features,), dtype={np.float32, np.float64}
        Feature-wise minima.

    maxs : ndarray of shape (n_features,), dtype={np.float32, np.float64}
        Feature-wise maxima.
    )r.   r1   r   N)r   r   r   r   r   r   )r   r   
ignore_nans      r   min_max_axisr\   4  sb    6 
{1~~ !(n44 	1&qt4444"140000r   c                    |dk    rd}n;|dk    rd}n2| j         dk    r't          d                     | j                             |5|| j        S t          j        t          j        | j                  |          S |dk    r5t          j        | j                  }||                    d          S ||z  S |dk    r{|&t          j        | j	        | j
        d         	          S t          j        |t          j        | j                            }t          j        | j	        | j
        d         |
          S t          d                     |                    )a  A variant of X.getnnz() with extension to weighting on axis 0.

    Useful in efficiently calculating multilabel metrics.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_labels)
        Input data. It should be of CSR format.

    axis : {0, 1}, default=None
        The axis on which the data is aggregated.

    sample_weight : array-like of shape (n_samples,), default=None
        Weight for each row of X.

    Returns
    -------
    nnz : int, float, ndarray of shape (n_samples,) or ndarray of shape (n_features,)
        Number of non-zero values in the array along a given axis. Otherwise,
        the total number of non-zero values in the array is returned.
    r   r   r.   z#Expected CSR sparse format, got {0}Nintp)	minlength)ra   r/   zUnsupported axis: {0})r   r   nnzr(   dotr*   r+   astypebincountr#   r    r)   r   )r   r   sample_weightoutr/   s        r   count_nonzerorh   X  sD   , rzz		
U		=DDQXNNOOO | 5L6"'!(++];;;	gah ::f%%%]""	 ;qyAGAJ????irwqx/@/@AAG;qyAGAJPPPP077==>>>r   c                 >   t          |           |z   }|st          j        S t          j        | dk               }t	          |d          \  }}|                                  |rt          || ||          S t          |dz
  | ||          t          || ||          z   dz  S )zCompute the median of data with n_zeros additional zeros.

    This function is used to support sparse matrices; it modifies data
    in-place.
    r   r   r   g       @)lenr(   nanrh   divmodsort_get_elem_at_rank)r!   n_zerosn_elems
n_negativemiddleis_odds         r   _get_medianrt     s     $ii'!G v!$(++JGQ''NFFIIKKK D z7CCC 	&1*dJ@@
FD*g
>
>	? r   c                 J    | |k     r||          S | |z
  |k     rdS || |z
           S )z@Find the value in data augmented with n_zeros for the given rankr    )rankr!   rq   ro   s       r   rn   rn     s;    jDzj7""qwr   c                    t          j        |           r| j        dk    st          d| j        z            | j        }| j        \  }}t          j        |          }t          t          |dd         |dd                             D ]F\  }\  }}t          j
        | j        ||                   }||j        z
  }	t          ||	          ||<   G|S )aC  Find the median across axis 0 of a CSC matrix.

    It is equivalent to doing np.median(X, axis=0).

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Input data. It should be of CSC format.

    Returns
    -------
    median : ndarray of shape (n_features,)
        Median.
    r1   z%Expected matrix of CSC format, got %sNr^   r   )r   r   r   r   r+   r    r(   zeros	enumeratezipcopyr!   r;   rt   )
r   r+   	n_samples
n_featuresmedianf_indstartendr!   nzs
             r   csc_median_axis_0r     s     KNN Lqx500?!(JKKKXFGIzXj!!F(VCRC[&*)E)EFF . .|swqveCi())"#D"--uMr   c                      dddf          j         t           fd fdfdfd j         j                  S )aA  Create an implicitly offset linear operator.

    This is used by PCA on sparse data to avoid densifying the whole data
    matrix.

    Params
    ------
        X : sparse matrix of shape (n_samples, n_features)
        offset : ndarray of shape (n_features,)

    Returns
    -------
    centered : LinearOperator
    Nc                     | z  | z  z
  S Nrv   xr   offsets    r   <lambda>z)_implicit_column_offset.<locals>.<lambda>      Q!+ r   c                     | z  | z  z
  S r   rv   r   s    r   r   z)_implicit_column_offset.<locals>.<lambda>  r   r   c                 >    | z  |                                  z  z
  S r   )sumr   XTr   s    r   r   z)_implicit_column_offset.<locals>.<lambda>  s    "q&FQUUWW$45 r   c                 `    | z  j         |                     d          d d d f         z  z
  S )Nr   r   )r4   r   r   s    r   r   z)_implicit_column_offset.<locals>.<lambda>  s0    "q&68aeeemmD!!!G.D#DD r   )matvecmatmatrmatvecrmatmatr7   r    )r4   r   r7   r    )r   r   r   s   ``@r   _implicit_column_offsetr     s|     D!!!G_F	
B++++++++++55555DDDDDgg   r   )NF)F)NN)&__doc__numpyr(   scipy.sparsesparser   scipy.sparse.linalgr   utils.fixesr   r   utils.validationr   sparsefuncs_fastr	   r3   r
   r2   r   r=   r   r   r&   r,   r5   r>   r@   rB   rK   rT   rW   rY   r\   rh   rt   rn   r   r   rv   r   r   <module>r      s   H H
           . . . . . . > > > > > > > > 3 3 3 3 3 3               
  
 
 
&1 &1 &1R2 2 2$K K K K\ NR p p p p pf* * *Z) ) )X  :< < <~* * *Z. . .b! ! ! !H3? 3? 3? 3?l  ,       >    r   