
    0Ph:V              	       j   d dl Z d dlZd dlZd dlmZ d dlmZmZ d dlZ	d dl
mZ ddlmZmZ ddlmZmZ ddlmZ dd	lmZmZmZmZmZmZmZ d
 Zd Zd Zd Zd!dZ d ddZ!ddddZ"d Z#d Z$d Z% edg ee j&        ddd          dgdgg ddd          dddddd            Z'dddd Z(dS )"    N)UserList)compressislice)issparse   )_is_numpy_namespaceget_namespace)Intervalvalidate_params)_approximate_mode)_is_arraylike_not_scalar_is_pandas_df_is_polars_df_or_series_use_interchange_protocolcheck_arraycheck_consistent_lengthcheck_random_statec                 ,   t          |           \  }}|r|                    | ||          S t          |           r|dk    rt          j        |          }t          |t                    rt          |          }|dk    r
| |df         n| dd|f         S )zAIndex an array or scipy.sparse consistently across NumPy version.axisboolr   .N)r	   taker   npasarray
isinstancetuplelist)arraykey	key_dtyper   xpis_array_apis         W/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/sklearn/utils/_indexing.py_array_indexingr$      s    $U++B .wwucw--- 9..joo#u 3ii"aii5c??U111c6]:    c                 (   t          |          rt          j        |          }|dk    r@t          |t                    s+t          j        |          s|                     ||          S |dk    r| j        n| j        }|r|dd|f         n||         S )z%Index a pandas dataframe or a series.intr   N)	r   r   r   r   sliceisscalarr   ilocloc)Xr   r    r   indexers        r#   _pandas_indexingr.   '   s    $$ jooE:c5#9#9R[=M=M vvcv%%% &..!&&AE"&8wqqq#vGCL8r%   c                      t          j        |          st          |t                    r |         S |dk    rt	          t           |                    S  fd|D             S )zIndex a Python list.r   c                      g | ]
}|         S  r1   ).0idxr,   s     r#   
<listcomp>z"_list_indexing.<locals>.<listcomp>?   s    """sAcF"""r%   )r   r)   r   r(   r   r   )r,   r   r    s   `  r#   _list_indexingr5   6   sl    	{3 :c511 vFHQ$$%%%""""c""""r%   c                 
   t          |t          j                  r|                                }n8t          j        |          s$t          |t
                    st          |          }|dk    r| dd|f         S |dk    r|                     |          S | |         }t          j        |          rRt          | j	                  dk    r:t          j        d         }|                    |                    d                    S |S )z,Indexing X with polars interchange protocol.r   Nr      polarsr   )r   r   ndarraytolistr)   r(   r   filterlenshapesysmodulesSeriesrow)r,   r   r    r   	X_indexedpls         r#   _polars_indexingrD   B   s     #rz"" jjllk# *S%"8"8 3iiqyy CyFxx}} #I	{3 +CLLA-- ["yyq))***r%   Tc                    d}t           dt          dt          dt          j        di}ddddddd}| dS t          | t          |                                                    r3	 |t          |                    S # t          $ r t          |          w xY wt          | t                    rh|st          d          | j        	| j        dS t          | j                  }t          | j                  }||||k    rt          |          ||S |S t          | t           t          t"          f          rUt%          |           }d |D             }|sdS t'          |          d	k    rt          |          |                                S t+          | d
          rt-          |           \  }	}
|
rXt/          |	          sI|	                    | j        d          rdS |	                    | j        d          rdS t          |          	 || j        j                 S # t          $ r t          |          w xY wt          |          )as  Determine the data type of key.

    Parameters
    ----------
    key : scalar, slice or array-like
        The key from which we want to infer the data type.

    accept_slice : bool, default=True
        Whether or not to raise an error if the key is a slice.

    Returns
    -------
    dtype : {'int', 'str', 'bool', None}
        Returns the data type of key.
    z~No valid specification of the columns. Only a scalar, list or slice of all integers or all strings, or boolean mask is allowedr'   strr   )iubOUSNzBOnly array-like or scalar are supported. A Python slice was given.c                 ,    h | ]}t          |          S r1   )_determine_key_type)r2   elts     r#   	<setcomp>z&_determine_key_type.<locals>.<setcomp>   s!    CCC',,CCCr%   r   dtypeintegral)r'   rF   r   r   bool_r   r   keystypeKeyError
ValueErrorr(   	TypeErrorstartstoprN   r   r   setr<   pophasattrr	   r   isdtyperQ   kind)r   accept_sliceerr_msgdtype_to_strarray_dtype_to_strkey_start_typekey_stop_type
unique_keykey_typer!   r"   s              r#   rN   rN   _   s   "	  UD&"(FKL  {t#u\..001122 &	&S		** 	& 	& 	&W%%%	&#u  	T   9!14,SY77+CH55%-*C.. )))%!! #eX.// XX
CC
CCC 	4x==AW%%%||~~sG *(--L  	* 3B 7 7 	*zz#)V,, *vCIz22 *u )))*)#).99 * * * )))*
W

s   #A8 8BH H2r   c                B   || S |dvr"t          d                    |                    t          |          }|dk    r|dk    rt          d          |dk    r$t          | t                    rt          d          |dk    rjt          | d	          rZt          | j                  d
k    rBt          d                    t          |           t          | j                                      |dk    r3|dk    r-t          |           st          |           st          d          t          | d          rt          | |||          S t          |           rt          | |||          S t          | d	          rt          | |||          S t          | ||          S )aX  Return rows, items or columns of X using indices.

    .. warning::

        This utility is documented, but **private**. This means that
        backward compatibility might be broken without any deprecation
        cycle.

    Parameters
    ----------
    X : array-like, sparse-matrix, list, pandas.DataFrame, pandas.Series
        Data from which to sample rows, items or columns. `list` are only
        supported when `axis=0`.
    indices : bool, int, str, slice, array-like
        - If `axis=0`, boolean and integer array-like, integer slice,
          and scalar integer are supported.
        - If `axis=1`:
            - to select a single column, `indices` can be of `int` type for
              all `X` types and `str` only for dataframe. The selected subset
              will be 1D, unless `X` is a sparse matrix in which case it will
              be 2D.
            - to select multiples columns, `indices` can be one of the
              following: `list`, `array`, `slice`. The type used in
              these containers can be one of the following: `int`, 'bool' and
              `str`. However, `str` is only supported when `X` is a dataframe.
              The selected subset will be 2D.
    axis : int, default=0
        The axis along which `X` will be subsampled. `axis=0` will select
        rows while `axis=1` will select columns.

    Returns
    -------
    subset
        Subset of X on axis 0 or 1.

    Notes
    -----
    CSR, CSC, and LIL sparse matrices are supported. COO sparse matrices are
    not supported.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.utils import _safe_indexing
    >>> data = np.array([[1, 2], [3, 4], [5, 6]])
    >>> _safe_indexing(data, 0, axis=0)  # select the first row
    array([1, 2])
    >>> _safe_indexing(data, 0, axis=1)  # select the first column
    array([1, 3, 5])
    N)r   r   zR'axis' should be either 0 (to index rows) or 1 (to index  column). Got {} instead.r   rF   z.String indexing is not supported with 'axis=0'r   z!axis=1 is not supported for listsr=   r7   z'X' should be a 2D NumPy array, 2D sparse matrix or dataframe when indexing the columns (i.e. 'axis=1'). Got {} instead with {} dimension(s).FSpecifying the columns using strings is only supported for dataframes.r*   r   )rW   formatrN   r   r   r]   r<   r=   rU   r   r   r.   r   rD   r$   r5   )r,   indicesr   indices_dtypes       r#   _safe_indexingrm      s   f 6((.t
 
 	

 (00Mqyy]e++IJJJqyyZ4((y<===qyyWQ((yS\\Q->->3396$q''3qw<<3P3P
 
 	
 			U""q!! #%>q%A%A # T
 
 	
 q& 	9  7MEEEE	 	#	# 97MEEEE	G		 9q'=tDDDDa-888r%   )row_indexercolumn_indexerc                0   |t          ddd          n|}|t          ddd          n|}t          | d          rTt          j                    5  t          j        dt
                     || j        ||f<   ddd           dS # 1 swxY w Y   dS || ||f<   dS )an  Safe assignment to a numpy array, sparse matrix, or pandas dataframe.

    Parameters
    ----------
    X : {ndarray, sparse-matrix, dataframe}
        Array to be modified. It is expected to be 2-dimensional.

    values : ndarray
        The values to be assigned to `X`.

    row_indexer : array-like, dtype={int, bool}, default=None
        A 1-dimensional array to select the rows of interest. If `None`, all
        rows are selected.

    column_indexer : array-like, dtype={int, bool}, default=None
        A 1-dimensional array to select the columns of interest. If `None`, all
        columns are selected.
    Nr*   ignore)r(   r]   warningscatch_warningssimplefilterFutureWarningr*   )r,   valuesrn   ro   s       r#   _safe_assignrw     s   & .9-@%dD)))kK#1#9dD$~  q& 
0$&& 	9 	9 !(M:::28AF;./	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 *0+~
%&&&s   'BB	Bc                     	 t          t          j        |          |           }n,# t          $ r}t	          d|dz
   d| d          |d }~ww xY wt          j        |                                          S )Nzall features must be in [0, r   z] or [-z, 0])rm   r   arange
IndexErrorrW   
atleast_1dr:   )r   	n_columnsr3   es       r#   #_get_column_indices_for_bool_or_intr~   8  s    RYy11377   P9q=PPPPP
 
	 =$$&&&s   "% 
AA		Ac                    t          |          }t          |           r#t          |                                 ||          S | j        d         }t          |t          t          f          r|sg S |dv rt          ||          S 	 | j	        }n# t          $ r t          d          w xY wt          |t                    r|g}nt          |t                    rp|j        |j        }}||                    |          }||                    |          dz   }n|dz   }t          t#          t%          |          ||                    S t          |          }	 g }|D ]Y}	|                    |	          }
t          |
t&          j                  st          d| d          |                    |
           Zn"# t,          $ r}t          d          |d}~ww xY w|S )zGet feature column indices for input data X and key.

    For accepted values of `key`, see the docstring of
    :func:`_safe_indexing`.
    r   r   r'   ri   NzSelected columns, z, are not unique in dataframe/A given column is not a column of the dataframe)rN   r   _get_column_indices_interchange__dataframe__r=   r   r   r   r~   columnsAttributeErrorrW   rF   r(   rY   rZ   get_locr   rangenumbersIntegralappendrV   )r,   r   r    r|   all_columnsr   rY   rZ   column_indicescolcol_idxr}   s               r#   _get_column_indicesr   C  s.    $C((I ## R.q/@/@#yQQQ
I#e}%% (c (		o	%	%23	BBB	)KK 	 	 	X  	 c3 	 eGGU## 	 )SX4E #++E22"**40014 1}uY//==>>>3iiG	WN / /%--c22!'7+;<< $SWSSS   %%g..../  	W 	W 	WNOOUVV	W s%   B B&AF6 6
G GGc                    |                                  }t          |t          t          f          r|sg S |dv rt	          ||          S t          |                                           t          |t                    r|j        dvrt          d          |j	        |j
        }}|                    |          }|                    |          dz   }n|dz   }t          t          t          |          ||                    S t          j        |          r|gn|}	 fd|D             S # t           $ r}t!          d          |d}~ww xY w)zBSame as _get_column_indices but for X with __dataframe__ protocol.r   )r   Nzkey.step must be 1 or NoneNr   c                 :    g | ]}                     |          S r1   )index)r2   r   column_namess     r#   r4   z3_get_column_indices_interchange.<locals>.<listcomp>  s'    HHHL&&s++HHHr%   r   )num_columnsr   r   r   r~   r   r(   stepNotImplementedErrorrY   rZ   r   r   r   r   r)   rW   )	X_interchanger   r    r|   rY   rZ   selected_columnsr}   r   s	           @r#   r   r   y  s~    ))++I#e}%% Wc W		o	%	%23	BBBM668899c5!! 	?xy(()*FGGG)SX4E $**511#))$//!3 1}uY//==>>>$&K$4$4=C55#	WHHHH7GHHHH 	W 	W 	WNOOUVV	Ws   "D0 0
E:E

Ebooleanleft)closedrandom_state)z
array-likezsparse matrixN)replace	n_samplesr   stratify)prefer_skip_nested_validationc                 x   |}t          |          }t          |          dk    rdS |d         }t          |d          r|j        d         nt          |          }||}n||k    r| st	          d||fz            t          |  |R| r|                    d||f          n`t          j        |          |	                               d|         n+t          |dd          }|j        dk    rt          j        d	 |D                       }t          j        |d
          \  }}	|j        d         }
t          j        |	          }t          j        t          j        |	d          t          j        |          dd                   }t%          |||          }g t'          |
          D ];}|                    ||         ||         |           }                    |           <|                              d |D             }fd|D             }t          |          dk    r|d         S |S )aW
  Resample arrays or sparse matrices in a consistent way.

    The default strategy implements one step of the bootstrapping
    procedure.

    Parameters
    ----------
    *arrays : sequence of array-like of shape (n_samples,) or             (n_samples, n_outputs)
        Indexable data-structures can be arrays, lists, dataframes or scipy
        sparse matrices with consistent first dimension.

    replace : bool, default=True
        Implements resampling with replacement. If False, this will implement
        (sliced) random permutations.

    n_samples : int, default=None
        Number of samples to generate. If left to None this is
        automatically set to the first dimension of the arrays.
        If replace is False it should not be larger than the length of
        arrays.

    random_state : int, RandomState instance or None, default=None
        Determines random number generation for shuffling
        the data.
        Pass an int for reproducible results across multiple function calls.
        See :term:`Glossary <random_state>`.

    stratify : {array-like, sparse matrix} of shape (n_samples,) or             (n_samples, n_outputs), default=None
        If not None, data is split in a stratified fashion, using this as
        the class labels.

    Returns
    -------
    resampled_arrays : sequence of array-like of shape (n_samples,) or             (n_samples, n_outputs)
        Sequence of resampled copies of the collections. The original arrays
        are not impacted.

    See Also
    --------
    shuffle : Shuffle arrays or sparse matrices in a consistent way.

    Examples
    --------
    It is possible to mix sparse and dense arrays in the same run::

      >>> import numpy as np
      >>> X = np.array([[1., 0.], [2., 1.], [0., 0.]])
      >>> y = np.array([0, 1, 2])

      >>> from scipy.sparse import coo_matrix
      >>> X_sparse = coo_matrix(X)

      >>> from sklearn.utils import resample
      >>> X, X_sparse, y = resample(X, X_sparse, y, random_state=0)
      >>> X
      array([[1., 0.],
             [2., 1.],
             [1., 0.]])

      >>> X_sparse
      <Compressed Sparse Row sparse matrix of dtype 'float64'
          with 4 stored elements and shape (3, 2)>

      >>> X_sparse.toarray()
      array([[1., 0.],
             [2., 1.],
             [1., 0.]])

      >>> y
      array([0, 1, 0])

      >>> resample(y, n_samples=2, random_state=0)
      array([0, 1])

    Example using stratification::

      >>> y = [0, 0, 1, 1, 1, 1, 1, 1, 1]
      >>> resample(y, n_samples=5, replace=False, stratify=y,
      ...          random_state=0)
      [1, 1, 1, 0, 1]
    r   Nr=   z@Cannot sample %d out of arrays with dim %d when replace is False)sizeF)	ensure_2drQ   r7   c                 ^    g | ]*}d                      |                    d                    +S ) rF   )joinastype)r2   rA   s     r#   r4   zresample.<locals>.<listcomp>  s0    CCC##((3::e#4#455CCCr%   T)return_inverse	mergesort)r_   )r   c                 X    g | ]'}t          |          r|                                n|(S r1   )r   tocsr)r2   as     r#   r4   zresample.<locals>.<listcomp>1  s/    >>>!8A;;-aggiiiA>>>r%   c                 0    g | ]}t          |          S r1   )rm   )r2   r   rk   s     r#   r4   zresample.<locals>.<listcomp>2  s#    CCCqq'22CCCr%   r   )r   r<   r]   r=   rW   r   randintr   ry   shuffler   ndimr   uniquebincountsplitargsortcumsumr   r   choiceextendpermutation)r   r   r   r   arraysmax_n_samplesfirstyclasses	y_indices	n_classesclass_countsclass_indicesn_irG   	indices_iresampled_arraysrk   s                    @r#   resampler     s   | M%l33L
6{{at1IE")%"9"9IAs5zzI!
)
#
#g
#Ni()
 
 	

 V$$ 	."**1i}>N*OOGGi	**G  )))n}n-GG E>>>6Q;; CCCCCDDAYq>>>M!$	{9-- Jy{333RY|5L5LSbS5Q
 
  m\JJy!! 	& 	&A$++M!,<c!fg+VVINN9%%%%**733 ?>v>>>FCCCCFCCC
!!""r%   )r   r   c                     t          |d|| dS )a  Shuffle arrays or sparse matrices in a consistent way.

    This is a convenience alias to ``resample(*arrays, replace=False)`` to do
    random permutations of the collections.

    Parameters
    ----------
    *arrays : sequence of indexable data-structures
        Indexable data-structures can be arrays, lists, dataframes or scipy
        sparse matrices with consistent first dimension.

    random_state : int, RandomState instance or None, default=None
        Determines random number generation for shuffling
        the data.
        Pass an int for reproducible results across multiple function calls.
        See :term:`Glossary <random_state>`.

    n_samples : int, default=None
        Number of samples to generate. If left to None this is
        automatically set to the first dimension of the arrays.  It should
        not be larger than the length of arrays.

    Returns
    -------
    shuffled_arrays : sequence of indexable data-structures
        Sequence of shuffled copies of the collections. The original arrays
        are not impacted.

    See Also
    --------
    resample : Resample arrays or sparse matrices in a consistent way.

    Examples
    --------
    It is possible to mix sparse and dense arrays in the same run::

      >>> import numpy as np
      >>> X = np.array([[1., 0.], [2., 1.], [0., 0.]])
      >>> y = np.array([0, 1, 2])

      >>> from scipy.sparse import coo_matrix
      >>> X_sparse = coo_matrix(X)

      >>> from sklearn.utils import shuffle
      >>> X, X_sparse, y = shuffle(X, X_sparse, y, random_state=0)
      >>> X
      array([[0., 0.],
             [2., 1.],
             [1., 0.]])

      >>> X_sparse
      <Compressed Sparse Row sparse matrix of dtype 'float64'
          with 3 stored elements and shape (3, 2)>

      >>> X_sparse.toarray()
      array([[0., 0.],
             [2., 1.],
             [1., 0.]])

      >>> y
      array([2, 1, 0])

      >>> shuffle(y, n_samples=2, random_state=0)
      array([0, 1])
    F)r   r   r   )r   )r   r   r   s      r#   r   r   :  s%    D 	),   r%   )T))r   r>   rr   collectionsr   	itertoolsr   r   numpyr   scipy.sparser   
_array_apir   r	   _param_validationr
   r   extmathr   
validationr   r   r   r   r   r   r   r$   r.   r5   rD   rN   rm   rw   r~   r   r   r   r   r   r1   r%   r#   <module>r      s    



              & & & & & & & &     ! ! ! ! ! ! : : : : : : : : 8 8 8 8 8 8 8 8 & & & & & &                 	; 	; 	;9 9 9	# 	# 	#  :Q Q Q Qh () ]9 ]9 ]9 ]9 ]9@ ,0 "0 "0 "0 "0 "0J' ' '3 3 3lW W WD ;hw/DHHH$O'(999	  #'   #dPT S  S  S  S  S l #'$ D D D D D D Dr%   