
    ZPhm4                        d Z ddlZddlZddlZddlmZ ddlmZ ddl	m
Z
 ddlmZmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZmZ ddlmZmZ ddlmZm Z  ddl!m"Z"  eej#        ee           G d de                      Z$dS )z9Bagging classifier trained on balanced bootstrap samples.    N)clone)BaggingClassifier)DecisionTreeClassifier)
HasMethodsInterval
StrOptions)parse_version   )Pipeline)RandomUnderSampler)BaseUnderSampler)Substitutioncheck_sampling_strategycheck_target_type)_n_jobs_docstring_random_state_docstring)_fit_contextsklearn_version   )_bagging_parameter_constraints)sampling_strategyn_jobsrandom_statec                       e Zd ZdZe ed          k    r ej        ej	                  Z	n ej        e
          Z	e	                     eej        ddd           eh d          eegdg ed	g          d
gd           de	v re	d= 	 	 dddddddddd
d
dd
d fdZ fdZ e            fdZ ed           fd            Zd fd	Zed             Z fdZ fdZ xZS )BalancedBaggingClassifieru  A Bagging classifier with additional balancing.

    This implementation of Bagging is similar to the scikit-learn
    implementation. It includes an additional step to balance the training set
    at fit time using a given sampler.

    This classifier can serves as a basis to implement various methods such as
    Exactly Balanced Bagging [6]_, Roughly Balanced Bagging [7]_,
    Over-Bagging [6]_, or SMOTE-Bagging [8]_.

    Read more in the :ref:`User Guide <bagging>`.

    Parameters
    ----------
    estimator : estimator object, default=None
        The base estimator to fit on random subsets of the dataset.
        If None, then the base estimator is a decision tree.

        .. versionadded:: 0.10

    n_estimators : int, default=10
        The number of base estimators in the ensemble.

    max_samples : int or float, default=1.0
        The number of samples to draw from X to train each base estimator.

        - If int, then draw ``max_samples`` samples.
        - If float, then draw ``max_samples * X.shape[0]`` samples.

    max_features : int or float, default=1.0
        The number of features to draw from X to train each base estimator.

        - If int, then draw ``max_features`` features.
        - If float, then draw ``max_features * X.shape[1]`` features.

    bootstrap : bool, default=True
        Whether samples are drawn with replacement.

        .. note::
           Note that this bootstrap will be generated from the resampled
           dataset.

    bootstrap_features : bool, default=False
        Whether features are drawn with replacement.

    oob_score : bool, default=False
        Whether to use out-of-bag samples to estimate
        the generalization error.

    warm_start : bool, default=False
        When set to True, reuse the solution of the previous call to fit
        and add more estimators to the ensemble, otherwise, just fit
        a whole new ensemble.

    {sampling_strategy}

    replacement : bool, default=False
        Whether or not to randomly sample with replacement or not when
        `sampler is None`, corresponding to a
        :class:`~imblearn.under_sampling.RandomUnderSampler`.

    {n_jobs}

    {random_state}

    verbose : int, default=0
        Controls the verbosity of the building process.

    sampler : sampler object, default=None
        The sampler used to balanced the dataset before to bootstrap
        (if `bootstrap=True`) and `fit` a base estimator. By default, a
        :class:`~imblearn.under_sampling.RandomUnderSampler` is used.

        .. versionadded:: 0.8

    Attributes
    ----------
    estimator_ : estimator
        The base estimator from which the ensemble is grown.

        .. versionadded:: 0.10

    estimators_ : list of estimators
        The collection of fitted base estimators.

    sampler_ : sampler object
        The validate sampler created from the `sampler` parameter.

    estimators_samples_ : list of ndarray
        The subset of drawn samples (i.e., the in-bag samples) for each base
        estimator. Each subset is defined by a boolean mask.

    estimators_features_ : list of ndarray
        The subset of drawn features for each base estimator.

    classes_ : ndarray of shape (n_classes,)
        The classes labels.

    n_classes_ : int or list
        The number of classes.

    oob_score_ : float
        Score of the training dataset obtained using an out-of-bag estimate.

    oob_decision_function_ : ndarray of shape (n_samples, n_classes)
        Decision function computed with out-of-bag estimate on the training
        set. If n_estimators is small it might be possible that a data point
        was never left out during the bootstrap. In this case,
        ``oob_decision_function_`` might contain NaN.

    n_features_in_ : int
        Number of features in the input dataset.

        .. versionadded:: 0.9

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during `fit`. Defined only when `X` has feature
        names that are all strings.

        .. versionadded:: 0.9

    See Also
    --------
    BalancedRandomForestClassifier : Random forest applying random-under
        sampling to balance the different bootstraps.

    EasyEnsembleClassifier : Ensemble of AdaBoost classifier trained on
        balanced bootstraps.

    RUSBoostClassifier : AdaBoost classifier were each bootstrap is balanced
        using random-under sampling at each round of boosting.

    Notes
    -----
    This is possible to turn this classifier into a balanced random forest [5]_
    by passing a :class:`~sklearn.tree.DecisionTreeClassifier` with
    `max_features='auto'` as a base estimator.

    See
    :ref:`sphx_glr_auto_examples_ensemble_plot_comparison_ensemble_classifier.py`.

    References
    ----------
    .. [1] L. Breiman, "Pasting small votes for classification in large
           databases and on-line", Machine Learning, 36(1), 85-103, 1999.

    .. [2] L. Breiman, "Bagging predictors", Machine Learning, 24(2), 123-140,
           1996.

    .. [3] T. Ho, "The random subspace method for constructing decision
           forests", Pattern Analysis and Machine Intelligence, 20(8), 832-844,
           1998.

    .. [4] G. Louppe and P. Geurts, "Ensembles on Random Patches", Machine
           Learning and Knowledge Discovery in Databases, 346-361, 2012.

    .. [5] C. Chen Chao, A. Liaw, and L. Breiman. "Using random forest to
           learn imbalanced data." University of California, Berkeley 110,
           2004.

    .. [6] R. Maclin, and D. Opitz. "An empirical evaluation of bagging and
           boosting." AAAI/IAAI 1997 (1997): 546-551.

    .. [7] S. Hido, H. Kashima, and Y. Takahashi. "Roughly balanced bagging
           for imbalanced data." Statistical Analysis and Data Mining: The ASA
           Data Science Journal 2.5‐6 (2009): 412-426.

    .. [8] S. Wang, and X. Yao. "Diversity analysis on imbalanced data sets by
           using ensemble models." 2009 IEEE symposium on computational
           intelligence and data mining. IEEE, 2009.

    Examples
    --------
    >>> from collections import Counter
    >>> from sklearn.datasets import make_classification
    >>> from sklearn.model_selection import train_test_split
    >>> from sklearn.metrics import confusion_matrix
    >>> from imblearn.ensemble import BalancedBaggingClassifier
    >>> X, y = make_classification(n_classes=2, class_sep=2,
    ... weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0,
    ... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
    >>> print('Original dataset shape %s' % Counter(y))
    Original dataset shape Counter({{1: 900, 0: 100}})
    >>> X_train, X_test, y_train, y_test = train_test_split(X, y,
    ...                                                     random_state=0)
    >>> bbc = BalancedBaggingClassifier(random_state=42)
    >>> bbc.fit(X_train, y_train)
    BalancedBaggingClassifier(...)
    >>> y_pred = bbc.predict(X_test)
    >>> print(confusion_matrix(y_test, y_pred))
    [[ 23   0]
     [  2 225]]
    z1.4r   r   right)closed>   not majoritynot minorityallautomajoritybooleanfit_resampleN)r   replacementsamplerbase_estimator
   g      ?TFr!   )max_samplesmax_features	bootstrapbootstrap_features	oob_score
warm_startr   r%   r   r   verboser&   c                    t                                          ||||||||||
  
         || _        |	| _        |
| _        || _        d S )N)
n_estimatorsr)   r*   r+   r,   r-   r.   r   r   r/   )super__init__	estimatorr   r%   r&   )selfr4   r1   r)   r*   r+   r,   r-   r.   r   r%   r   r   r/   r&   	__class__s                  Z/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/imblearn/ensemble/_bagging.pyr3   z"BalancedBaggingClassifier.__init__   sh    $ 	%#%1!% 	 	
 	
 	
 #!2&    c                 @    t                                          |          }t           j        t                    rT j        j        dk    rD fdt           j        | j        j                                                  D              _	        n j         _	        |S )Nbypassc                 j    i | ]/\  }}t          j        j        |k              d          d          |0S )r   )npwhereclasses_).0keyvaluer5   s      r7   
<dictcomp>z9BalancedBaggingClassifier._validate_y.<locals>.<dictcomp>!  sJ     ' ' 'C #-..q1!4e' ' 'r8   )
r2   _validate_y
isinstancer   dictsampler__sampling_typer   items_sampling_strategy)r5   y	y_encodedr6   s   `  r7   rC   z%BalancedBaggingClassifier._validate_y  s    GG''**	t-t44	=,88' ' ' '"9*M0# # %''' ' 'D## '+&<D#r8   c                     | j         t          | j                   }nt          |          }| j        j        dk    r | j                            | j                   t          d| j        fd|fg          | _        dS )zZCheck the estimator and the n_estimator attribute, set the
        `estimator_` attribute.Nr:   )r   r&   
classifier)r4   r   rF   rG   
set_paramsrI   r   
estimator_)r5   defaultr4   s      r7   _validate_estimatorz-BalancedBaggingClassifier._validate_estimator-  s}     >%dn--IIgI='833M$$t7N$OOO"',	)BC
 
r8   )prefer_skip_nested_validationc                 p    |                                   t                                          ||          S )a+  Build a Bagging ensemble of estimators from the training set (X, y).

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The training input samples. Sparse matrices are accepted only if
            they are supported by the base estimator.

        y : array-like of shape (n_samples,)
            The target values (class labels in classification, real numbers in
            regression).

        Returns
        -------
        self : object
            Fitted estimator.
        )_validate_paramsr2   fit)r5   XrJ   r6   s      r7   rU   zBalancedBaggingClassifier.fit<  s/    ( 	ww{{1a   r8   c                     t          |           | j        t          | j                  | _        nt          | j                  | _        t                                          ||| j                  S )N)r%   )	r   r&   r   r%   rF   r   r2   _fitr)   )r5   rV   rJ   r)   	max_depthsample_weightr6   s         r7   rX   zBalancedBaggingClassifier._fitS  sk    ! <. ,  DMM "$,//DM ww||Aq$"2333r8   c                 >    t          | j        j         d          }|)z2Attribute for older sklearn version compatibility.z+ object has no attribute 'base_estimator_'.)AttributeErrorr6   __name__)r5   errors     r7   base_estimator_z)BalancedBaggingClassifier.base_estimator_b  s+     ~&SSS
 
 r8   c                     t                                                      }d}d}d}||v r|||         |<   n||i||<   |S )N_xfail_checkscheck_estimators_nan_infz9Fails because the sampler removed infinity and NaN values)r2   
_more_tags)r5   tagstags_keyfailing_testreasonr6   s        r7   rc   z$BalancedBaggingClassifier._more_tagsj  sV    ww!!##"1Lt+1DN<((*F3DNr8   c                 H    t                                                      }|S )N)r2   __sklearn_tags__)r5   rd   r6   s     r7   ri   z*BalancedBaggingClassifier.__sklearn_tags__u  s    ww''))r8   )Nr(   )NNN)r]   
__module____qualname____doc__r   r	   copydeepcopyr   _parameter_constraintsr   updater   numbersRealr   rE   callabler   r3   rC   r   rQ   r   rU   rX   propertyr_   rc   ri   __classcell__)r6   s   @r7   r   r      s.       @ @F --....!./@/W!X!X!./M!N!N!! q!G<<<
VVVWW	" &;"
N#344d;		
 		
   111"#34 !
   !! ! ! ! ! ! !F    $ +A*@*B*B 
 
 
 
 \666! ! ! ! 76!,4 4 4 4 4 4   X	 	 	 	 	        r8   r   )%rl   rm   rq   numpyr<   sklearn.baser   sklearn.ensembler   sklearn.treer   sklearn.utils._param_validationr   r   r   sklearn.utils.fixesr	   pipeliner   under_samplingr   under_sampling.baser   utilsr   r   r   utils._docstringr   r   utils._sklearn_compatr   r   _commonr   _sampling_strategy_docstringr    r8   r7   <module>r      s   ? ?             . . . . . . / / / / / / L L L L L L L L L L - - - - - -       / / / / / / 2 2 2 2 2 2 L L L L L L L L L L I I I I I I I I A A A A A A A A 3 3 3 3 3 3 &C(  
X X X X X 1 X X 
X X Xr8   