
    ZPh"                         d Z ddlZddlZddlmZ ddlmZ ddlm	Z	 ddl
mZ ddlmZ ddlmZ d	d
lmZmZmZ  G d de          ZdS )z(Metrics to perform pairwise computation.    N)distance_matrix)BaseEstimator)check_consistent_length)
StrOptions)unique_labels)check_is_fitted   )_fit_contextcheck_arrayvalidate_datac                        e Zd ZU dZ edh          dgej        gej        gdZee	d<   dddddZ
 ed	
          d             ZddZd Z fdZ xZS )ValueDifferenceMetrica{  Class implementing the Value Difference Metric.

    This metric computes the distance between samples containing only
    categorical features. The distance between feature values of two samples is
    defined as:

    .. math::
       \delta(x, y) = \sum_{c=1}^{C} |p(c|x_{f}) - p(c|y_{f})|^{k} \ ,

    where :math:`x` and :math:`y` are two samples and :math:`f` a given
    feature, :math:`C` is the number of classes, :math:`p(c|x_{f})` is the
    conditional probability that the output class is :math:`c` given that
    the feature value :math:`f` has the value :math:`x` and :math:`k` an
    exponent usually defined to 1 or 2.

    The distance for the feature vectors :math:`X` and :math:`Y` is
    subsequently defined as:

    .. math::
       \Delta(X, Y) = \sum_{f=1}^{F} \delta(X_{f}, Y_{f})^{r} \ ,

    where :math:`F` is the number of feature and :math:`r` an exponent usually
    defined equal to 1 or 2.

    The definition of this distance was propoed in [1]_.

    Read more in the :ref:`User Guide <vdm>`.

    .. versionadded:: 0.8

    Parameters
    ----------
    n_categories : "auto" or array-like of shape (n_features,), default="auto"
        The number of unique categories per features. If `"auto"`, the number
        of categories will be computed from `X` at `fit`. Otherwise, you can
        provide an array-like of such counts to avoid computation. You can use
        the fitted attribute `categories_` of the
        :class:`~sklearn.preprocesssing.OrdinalEncoder` to deduce these counts.

    k : int, default=1
        Exponent used to compute the distance between feature value.

    r : int, default=2
        Exponent used to compute the distance between the feature vector.

    Attributes
    ----------
    n_categories_ : ndarray of shape (n_features,)
        The number of categories per features.

    proba_per_class_ : list of ndarray of shape (n_categories, n_classes)
        List of length `n_features` containing the conditional probabilities
        for each category given a class.

    n_features_in_ : int
        Number of features in the input dataset.

        .. versionadded:: 0.10

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during `fit`. Defined only when `X` has feature
        names that are all strings.

        .. versionadded:: 0.10

    See Also
    --------
    sklearn.neighbors.DistanceMetric : Interface for fast metric computation.

    Notes
    -----
    The input data `X` are expected to be encoded by an
    :class:`~sklearn.preprocessing.OrdinalEncoder` and the data type is used
    should be `np.int32`. If other data types are given, `X` will be converted
    to `np.int32`.

    References
    ----------
    .. [1] Stanfill, Craig, and David Waltz. "Toward memory-based reasoning."
       Communications of the ACM 29.12 (1986): 1213-1228.

    Examples
    --------
    >>> import numpy as np
    >>> X = np.array(["green"] * 10 + ["red"] * 10 + ["blue"] * 10).reshape(-1, 1)
    >>> y = [1] * 8 + [0] * 5 + [1] * 7 + [0] * 9 + [1]
    >>> from sklearn.preprocessing import OrdinalEncoder
    >>> encoder = OrdinalEncoder(dtype=np.int32)
    >>> X_encoded = encoder.fit_transform(X)
    >>> from imblearn.metrics.pairwise import ValueDifferenceMetric
    >>> vdm = ValueDifferenceMetric().fit(X_encoded, y)
    >>> pairwise_distance = vdm.pairwise(X_encoded)
    >>> pairwise_distance.shape
    (30, 30)
    >>> X_test = np.array(["green", "red", "blue"]).reshape(-1, 1)
    >>> X_test_encoded = encoder.transform(X_test)
    >>> vdm.pairwise(X_test_encoded)
    array([[0.  ,  0.04,  1.96],
           [0.04,  0.  ,  1.44],
           [1.96,  1.44,  0.  ]])
    autoz
array-liken_categorieskr_parameter_constraints   r	   c                0    || _         || _        || _        d S Nr   )selfr   r   r   s       Y/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/imblearn/metrics/pairwise.py__init__zValueDifferenceMetric.__init__   s    (    T)prefer_skip_nested_validationc           	         |                                   t          ||           t          | ||dt          j                  \  }}t          |d          }t          | j        t                    r*| j        dk    r|	                    d          dz   | _
        nht          | j                  | j        k    r-t          dt          | j                   d	| j         d
          t          j        | j                  | _
        t          |          fd| j
        D             | _        t#          | j                  D ]V}t%                    D ]D\  }}t          j        |||k    |f         | j
        |                   | j        |         dd|f<   EWt          j        d          5  t#          | j                  D ]k}| j        |xx         | j        |                             d                              dd          z  cc<   t          j        | j        |         d           l	 ddd           n# 1 swxY w Y   | S )a  Compute the necessary statistics from the training set.

        Parameters
        ----------
        X : ndarray of shape (n_samples, n_features), dtype=np.int32
            The input data. The data are expected to be encoded with a
            :class:`~sklearn.preprocessing.OrdinalEncoder`.

        y : ndarray of shape (n_features,)
            The target.

        Returns
        -------
        self : object
            Return the instance itself.
        T)Xyresetdtype)ensure_non_negativer   r   )axisr   zRThe length of n_categories is not consistent with the number of feature in X. Got z elements in n_categories and z in X.c                 n    g | ]1}t          j        |t                    ft           j                   2S )shaper!   )npemptylenfloat64).0n_catclassess     r   
<listcomp>z-ValueDifferenceMetric.fit.<locals>.<listcomp>   sE     !
 !
 !
 HE3w<<0
CCC!
 !
 !
r   )	minlengthNignore)invalidF)copy)_validate_paramsr   r   r'   int32r   
isinstancer   strmaxn_categories_r)   n_features_in_
ValueErrorasarrayr   proba_per_class_range	enumeratebincounterrstatesumreshape
nan_to_num)r   r   r   feature_idx	klass_idxklassr-   s         @r   fitzValueDifferenceMetric.fit   s   $ 	1%%%TQ!4rxHHH1t444d'-- 	?$2Cv2M2M!"A!2D4$%%)<<< 36t7H3I3I 484G     "$D,=!>!>D""!
 !
 !
 !
+!
 !
 !
 !!455 	 	K$-g$6$6   	5CE;a5j+-."0=D D D%k2111i<@@ [*** 	N 	N$T%899 N N%k222)+6:::BBJJ2qQQ222 d3K@uMMMMM	N	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N s   +BH99H= H=Nc                    t          |            t          |dt          j                  }|j        d         }|*t          |dt          j                  }|j        d         }n|}t          j        ||ft          j                  }t          | j                  D ]c}| j	        |         |dd|f                  }|| j	        |         |dd|f                  }n|}|t          ||| j                  | j        z  z  }d|S )am  Compute the VDM distance pairwise.

        Parameters
        ----------
        X : ndarray of shape (n_samples, n_features), dtype=np.int32
            The input data. The data are expected to be encoded with a
            :class:`~sklearn.preprocessing.OrdinalEncoder`.

        Y : ndarray of shape (n_samples, n_features), dtype=np.int32
            The input data. The data are expected to be encoded with a
            :class:`~sklearn.preprocessing.OrdinalEncoder`.

        Returns
        -------
        distance_matrix : ndarray of shape (n_samples, n_samples)
            The VDM pairwise distance.
        T)r"   r!   r   Nr%   )p)r   r   r'   r5   r&   zerosr*   r>   r:   r=   r   r   r   )	r   r   Yn_samples_Xn_samples_YdistancerE   proba_feature_Xproba_feature_Ys	            r   pairwisezValueDifferenceMetric.pairwise   s   $ 	t28DDDgaj=A4rxHHHA'!*KK%K8;"<BJOOO !455 	 	K"3K@111k>ARSO}"&"7"DQqqq+~EV"W"1DFKKKtvUHH r   c                 
    ddiS )Nrequires_positive_XT )r   s    r   
_more_tagsz ValueDifferenceMetric._more_tags   s    !4
 	
r   c                 `    t                                                      }d|j        _        |S )NT)super__sklearn_tags__
input_tagspositive_only)r   tags	__class__s     r   rY   z&ValueDifferenceMetric.__sklearn_tags__   s'    ww''))(,%r   r   )__name__
__module____qualname____doc__r   numbersIntegralr   dict__annotations__r   r
   rH   rR   rV   rY   __classcell__)r]   s   @r   r   r      s         d dN $VH--|<$ $D    (.a     
 \555: : 65:x& & & &P
 
 

        r   r   )ra   rb   numpyr'   scipy.spatialr   sklearn.baser   sklearn.utilsr   sklearn.utils._param_validationr   sklearn.utils.multiclassr   sklearn.utils.validationr   utils._sklearn_compatr
   r   r   r   rU   r   r   <module>ro      s    . .
      ) ) ) ) ) ) & & & & & & 1 1 1 1 1 1 6 6 6 6 6 6 2 2 2 2 2 2 4 4 4 4 4 4 L L L L L L L L L L_ _ _ _ _M _ _ _ _ _r   