
    0Ph2                         d Z ddlZddlmZ ddlZddlmZ ddl	m
Z
mZmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZ ddlmZ ddlmZ ddlm Z m!Z!  G d deee
          Z"dS )z!
Nearest Centroid Classification
    N)Real)sparse   )BaseEstimatorClassifierMixin_fit_context)#DiscriminantAnalysisPredictionMixin)pairwise_distancespairwise_distances_argmin)LabelEncoder)get_tags)available_if)Interval
StrOptions)check_classification_targets)csc_median_axis_0)check_is_fittedvalidate_datac                   r    e Zd ZU dZ eddh          g eeddd          dgd ed	d
h          gdZee	d<   	 ddd
ddZ
 ed          d             Z fdZd Zd Z  ee          ej                  Z  ee          ej                  Z  ee          ej                  Z fdZ xZS )NearestCentroida  Nearest centroid classifier.

    Each class is represented by its centroid, with test samples classified to
    the class with the nearest centroid.

    Read more in the :ref:`User Guide <nearest_centroid_classifier>`.

    Parameters
    ----------
    metric : {"euclidean", "manhattan"}, default="euclidean"
        Metric to use for distance computation.

        If `metric="euclidean"`, the centroid for the samples corresponding to each
        class is the arithmetic mean, which minimizes the sum of squared L1 distances.
        If `metric="manhattan"`, the centroid is the feature-wise median, which
        minimizes the sum of L1 distances.

        .. versionchanged:: 1.5
            All metrics but `"euclidean"` and `"manhattan"` were deprecated and
            now raise an error.

        .. versionchanged:: 0.19
            `metric='precomputed'` was deprecated and now raises an error

    shrink_threshold : float, default=None
        Threshold for shrinking centroids to remove features.

    priors : {"uniform", "empirical"} or array-like of shape (n_classes,),         default="uniform"
        The class prior probabilities. By default, the class proportions are
        inferred from the training data.

        .. versionadded:: 1.6

    Attributes
    ----------
    centroids_ : array-like of shape (n_classes, n_features)
        Centroid of each class.

    classes_ : array of shape (n_classes,)
        The unique classes labels.

    n_features_in_ : int
        Number of features seen during :term:`fit`.

        .. versionadded:: 0.24

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during :term:`fit`. Defined only when `X`
        has feature names that are all strings.

        .. versionadded:: 1.0

    deviations_ : ndarray of shape (n_classes, n_features)
        Deviations (or shrinkages) of the centroids of each class from the
        overall centroid. Equal to eq. (18.4) if `shrink_threshold=None`,
        else (18.5) p. 653 of [2]. Can be used to identify features used
        for classification.

        .. versionadded:: 1.6

    within_class_std_dev_ : ndarray of shape (n_features,)
        Pooled or within-class standard deviation of input data.

        .. versionadded:: 1.6

    class_prior_ : ndarray of shape (n_classes,)
        The class prior probabilities.

        .. versionadded:: 1.6

    See Also
    --------
    KNeighborsClassifier : Nearest neighbors classifier.

    Notes
    -----
    When used for text classification with tf-idf vectors, this classifier is
    also known as the Rocchio classifier.

    References
    ----------
    [1] Tibshirani, R., Hastie, T., Narasimhan, B., & Chu, G. (2002). Diagnosis of
    multiple cancer types by shrunken centroids of gene expression. Proceedings
    of the National Academy of Sciences of the United States of America,
    99(10), 6567-6572. The National Academy of Sciences.

    [2] Hastie, T., Tibshirani, R., Friedman, J. (2009). The Elements of Statistical
    Learning Data Mining, Inference, and Prediction. 2nd Edition. New York, Springer.

    Examples
    --------
    >>> from sklearn.neighbors import NearestCentroid
    >>> import numpy as np
    >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
    >>> y = np.array([1, 1, 1, 2, 2, 2])
    >>> clf = NearestCentroid()
    >>> clf.fit(X, y)
    NearestCentroid()
    >>> print(clf.predict([[-0.8, -1]]))
    [1]
    	manhattan	euclideanr   Nneither)closedz
array-like	empiricaluniformmetricshrink_thresholdpriors_parameter_constraints)r   r    c                0    || _         || _        || _        d S )Nr   )selfr   r   r    s       c/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/sklearn/neighbors/_nearest_centroid.py__init__zNearestCentroid.__init__   s      0    T)prefer_skip_nested_validationc                 "   | j         dk    rt          | ||dg          \  }}n6t          |           j        j        rdnd}t          | |||ddg          \  }}t          j        |          }t          |           |j        \  }}t                      }|
                    |          }|j        x| _        }	|	j        }
|
dk     rt          d	|
z            | j        d
k    rPt          j        |d          \  }}t          j        |          t%          t'          |                    z  | _        nJ| j        dk    r!t          j        d|
z  g|
z            | _        nt          j        | j                  | _        | j        dk                                     rt          d          t          j        | j                                        d          s@t3          j        dt6                     | j        | j                                        z  | _        t          j        |
|ft          j                  | _        t          j        |
          }tA          |
          D ]}||k    }t          j        |          ||<   |rt          j!        |          d         }| j         dk    rE|s%t          j"        ||         d          | j        |<   mtG          ||                   | j        |<   ||         $                    d          | j        |<   t          j%        || j        |         z
  d          dz  }t          j%        t          j&        |                    d          ||
z
  z            d          | _'        t-          | j'        dk              rt3          j        d           d}|rdt          j(        |)                    d          |*                    d          z
  +                                dk              rt          |          |s;t          j(        t          j,        |d          dk              rt          |          |$                    d          }t          j&        d|z  d|z  z
            }| j'        t          j"        | j'                  z   }|-                    t'          |          d          }||z  }t          j%        | j        |z
  |z  d          | _.        | j/        rt          j0        | j.                  }t          j1        | j.                  | j/        z
  | _.        t          j2        | j.        dd| j.                   | xj.        |z  c_.        || j.        z  }t          j%        ||z   d          | _        | S )a0  
        Fit the NearestCentroid model according to the given training data.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Training vector, where `n_samples` is the number of samples and
            `n_features` is the number of features.
            Note that centroid shrinking cannot be used with sparse matrices.
        y : array-like of shape (n_samples,)
            Target values.

        Returns
        -------
        self : object
            Fitted estimator.
        r   csc)accept_sparse	allow-nanTcsr)ensure_all_finiter*   r   z>The number of classes has to be greater than one; got %d classr   )return_inverser      r   zpriors must be non-negativeg      ?zAThe priors do not sum to 1. Normalizing such that it sums to one.dtype)axisF)copyzself.within_class_std_dev_ has at least 1 zero standard deviation.Inputs within the same classes for at least 1 feature are identical.z2All features have zero variance. Division by zero.N)out)3r   r   r   
input_tags	allow_nanspissparser   shaper   fit_transformclasses_size
ValueErrorr    npuniquebincountfloatlenclass_prior_asarrayanyisclosesumwarningswarnUserWarningemptyfloat64
centroids_zerosrangewheremedianr   meanarraysqrtwithin_class_std_dev_allmaxmintoarrayptpreshapedeviations_r   signabsclip)r#   Xyr-   is_X_sparse	n_samples
n_featuresley_indclasses	n_classes_class_countsnk	cur_classcenter_maskvarianceerr_msgdataset_centroid_msmmmssignsmsds                            r$   fitzNearestCentroid.fit   su   * ;+%% q!E7CCCDAqq  (~~8BL  !"3$en  DAq k!nn$Q''' !	:^^  ##"$+-L	q==P  
 ;+%% i$???OA| "L 9 9E#a&&MM ID[I%% "
A	M?Y+F G GD "
4; 7 7D!&&(( 	<:;;;z$+//11377 	LMS   !% 1D4E4I4I4K4K KD (Iz#:"*MMM Xi  y)) 	I 	II9,KF;//ByM 7 h{33A6{k))" S131[>PQ1R1R1RDOI..1B1[>1R1RDOI..-.{^-@-@a-@-H-H	** 8A 66UCCCqH%'XGHLLaL((I	,ABCC%&
 &
 &
" t)Q.// 	MW  
 G 	&26155a5==155a5==#@"I"I"K"Kq"PQQ 	&W%%% 	&qq(9(9(9Q(>!?!? 	&W%%%FFFNNGS2X#	/233 &43M)N)NNYYs1vvq!!!V8_00B6U
 
 

   	LGD,--E!vd&677$:OODGD$a43CDDDD%t''C h'83'>UKKKDOr&   c                    t          |            t          j        | j        dt	          | j                  z                                            rXt          |           j        j	        rdnd}t          | ||dd          }| j        t          || j        | j                           S t                                          |          S )a  Perform classification on an array of test vectors `X`.

        The predicted class `C` for each sample in `X` is returned.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Input data.

        Returns
        -------
        y_pred : ndarray of shape (n_samples,)
            The predicted classes.
        r/   r+   Tr,   F)r-   r*   resetr   )r   r>   rF   rC   rB   r;   rV   r   r5   r6   r   r   rM   r   superpredict)r#   r`   r-   	__class__s      r$   r|   zNearestCentroid.predict  s     	:d'S-?-?)?@@DDFF 	&  (~~8BL  "3#  A =)!T_T[QQQ  77??1%%%r&   c           	         t          | d           t          | |dddt          j                  }t          j        |j        d         | j        j        ft          j                  }| j        dk    }|d d |fxx         | j        |         z  cc<   | j	        
                                }|d d |fxx         | j        |         z  cc<   t          | j        j                  D ]v}t          |||g         | j                                                  }|d	z  }t          j        | d
t          j        | j        |                   z  z             |d d |f<   w|S )NrM   TFr,   )r3   ry   r*   r1   r   r0   rz   r   g       @)r   r   r>   rL   rK   r9   r;   r<   rU   rM   r3   rO   r
   r   ravelsqueezelogrC   )r#   r`   X_normalizeddiscriminant_scoremaskcentroids_normalized	class_idx	distancess           r$   _decision_functionz"NearestCentroid._decision_function8  s   l+++$!$e5

 
 
  X"DM$67rz
 
 
 )Q.QQQW!;D!AA#3355QQQW%%%)CD)II%%%t}122 	 	I*2I;?  egg  !OI/1z
S26$*;I*F#G#GGG0 0qqq)|,, "!r&   c                     | j         dk    S )Nr   rz   )r#   s    r$   _check_euclidean_metricz'NearestCentroid._check_euclidean_metricT  s    {k))r&   c                     t                                                      }| j        dk    |j        _        d|j        _        |S )Nnan_euclideanT)r{   __sklearn_tags__r   r5   r6   r   )r#   tagsr}   s     r$   r   z NearestCentroid.__sklearn_tags__c  s8    ww''))$(K?$B!!%r&   )r   )__name__
__module____qualname____doc__r   r   r   r!   dict__annotations__r%   r   rw   r|   r   r   r   r	   decision_functionpredict_probapredict_log_probar   __classcell__)r}   s   @r$   r   r      s        e eP :{K899:%XdAtIFFFM[),D!E!EF$ $D    	 	 	 	 	 	 \555{ { 65{z &  &  &  &  &D" " "8* * * >%<==+=  :LL!899+9 M >%<==+=         r&   r   )#r   rH   numbersr   numpyr>   scipyr   r7   baser   r   r   discriminant_analysisr	   metrics.pairwiser
   r   preprocessingr   utilsr   utils._available_ifr   utils._param_validationr   r   utils.multiclassr   utils.sparsefuncsr   utils.validationr   r   r    r&   r$   <module>r      sq                     ? ? ? ? ? ? ? ? ? ? G G G G G G        ) ( ( ( ( (       . . . . . . : : : : : : : : ; ; ; ; ; ; 1 1 1 1 1 1 = = = = = = = =J J J J J'-J J J J Jr&   