
    0Ph             
          d Z ddlZddlZddlmZmZ ddlmZ ddlm	Z	 ddl
mZ ddlmZmZ ddlmZmZ ddlZdd	lmZ d
dlmZmZmZmZ d
dlmZmZmZ d
dlm Z m!Z!m"Z" d
dl#m$Z$ d
dl%m&Z& d
dl'm(Z( d
dl)m*Z*m+Z+m,Z, g dZ- G d d          Z. G d de&          Z/ G d de&e          Z0 G d de.e0          Z1 G d de.e0          Z2 G d de0e          Z3 G d  d!e.e3          Z4 G d" d#e/e3          Z5 G d$ d%e3          Z6 G d& d'e/e3          Z7 G d( d)e3          Z8 G d* d+e/e0          Z9 G d, d-e/e0          Z: G d. d/e&e          Z; G d0 d1e.e;          Z< G d2 d3e.e;          Z= G d4 d5e&e          Z> G d6 d7e.e>          Z? G d8 d9e/e>          Z@ G d: d;e>          ZAdTd<ZB G d= d>e0          ZC G d? d@e0          ZDdUdBdCdDZE e" e e!ddEdFG           e ejF        dEddHG          dg e e!ddEdFG           e ejF        dEddHG          dgdIgdJgdKdgdLdMN          ddddMddLdO            ZG eHeGdPdB           deIfdQZJdR ZKdS ZLdS )Vz
The :mod:`sklearn.model_selection._split` module includes classes and
functions to split the data based on a preset strategy.
    N)ABCMetaabstractmethod)defaultdict)Iterable)	signature)chaincombinations)ceilfloor)comb   )_safe_indexingcheck_random_state	indexablemetadata_routing)_convert_to_numpyensure_common_namespace_deviceget_namespace)Interval
RealNotIntvalidate_params)_approximate_mode)_MetadataRequester)type_of_target)_num_samplescheck_arraycolumn_or_1d)BaseCrossValidatorKFold
GroupKFoldLeaveOneGroupOutLeaveOneOutLeavePGroupsOut	LeavePOutRepeatedStratifiedKFoldRepeatedKFoldShuffleSplitGroupShuffleSplitStratifiedKFoldStratifiedGroupKFoldStratifiedShuffleSplitPredefinedSplittrain_test_splitcheck_cvc                   $     e Zd ZdZd fd	Z xZS )_UnsupportedGroupCVMixinz/Mixin for splitters that do not support Groups.Nc                     |'t          j        d| j        j         t                     t                                          |||          S )a  Generate indices to split data into training and test set.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data, where `n_samples` is the number of samples
            and `n_features` is the number of features.

        y : array-like of shape (n_samples,)
            The target variable for supervised learning problems.

        groups : object
            Always ignored, exists for compatibility.

        Yields
        ------
        train : ndarray
            The training set indices for that split.

        test : ndarray
            The testing set indices for that split.
        N#The groups parameter is ignored by groups)warningswarn	__class____name__UserWarningsupersplitselfXyr4   r7   s       ^/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/sklearn/model_selection/_split.pyr;   z_UnsupportedGroupCVMixin.split>   sP    . MOdn6MOO   ww}}Q&}111    NN)r8   
__module____qualname____doc__r;   __classcell__r7   s   @r@   r0   r0   ;   sC        992 2 2 2 2 2 2 2 2 2rA   r0   c                       e Zd ZdZddiZdS )GroupsConsumerMixinzA Mixin to ``groups`` by default.

    This Mixin makes the object to request ``groups`` by default as ``True``.

    .. versionadded:: 1.3
    r4   TN)r8   rC   rD   rE   -_GroupsConsumerMixin__metadata_request__split rA   r@   rI   rI   ]   s'          "*4 0rA   rI   c                   Z    e Zd ZdZdej        iZd	dZd
dZd
dZ	e
d
d            Zd ZdS )r   zvBase class for all cross-validators.

    Implementations must define `_iter_test_masks` or `_iter_test_indices`.
    r4   Nc              #      K   t          |||          \  }}}t          j        t          |                    }|                     |||          D ]*}|t          j        |                   }||         }||fV  +dS )a  Generate indices to split data into training and test set.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data, where `n_samples` is the number of samples
            and `n_features` is the number of features.

        y : array-like of shape (n_samples,)
            The target variable for supervised learning problems.

        groups : array-like of shape (n_samples,), default=None
            Group labels for the samples used while splitting the dataset into
            train/test set.

        Yields
        ------
        train : ndarray
            The training set indices for that split.

        test : ndarray
            The testing set indices for that split.
        N)r   nparanger   _iter_test_maskslogical_not)r=   r>   r?   r4   indices
test_indextrain_indexs          r@   r;   zBaseCrossValidator.splitt   s      0 !Av..1f)LOO,,//1f== 	* 	*J!"."<"<=K ,Jz)))))	* 	*rA   c              #      K   |                      |||          D ]3}t          j        t          |          t                    }d||<   |V  4dS )zGenerates boolean masks corresponding to test sets.

        By default, delegates to _iter_test_indices(X, y, groups)
        dtypeTN)_iter_test_indicesrN   zerosr   bool)r=   r>   r?   r4   rS   	test_masks         r@   rP   z#BaseCrossValidator._iter_test_masks   sb      
 11!Q?? 	 	Ja===I$(Ij!OOOO	 	rA   c                     t           )z5Generates integer indices corresponding to test sets.)NotImplementedErrorr=   r>   r?   r4   s       r@   rX   z%BaseCrossValidator._iter_test_indices   s    !!rA   c                     dS )zBReturns the number of splitting iterations in the cross-validator.NrK   r^   s       r@   get_n_splitszBaseCrossValidator.get_n_splits   s      rA   c                      t          |           S N_build_reprr=   s    r@   __repr__zBaseCrossValidator.__repr__       4   rA   rB   NNN)r8   rC   rD   rE   r   UNUSED,_BaseCrossValidator__metadata_request__splitr;   rP   rX   r   r`   rf   rK   rA   r@   r   r   h   s          "*+;+B C* * * *B   " " " " Q Q Q ^Q! ! ! ! !rA   r   )	metaclassc                   "    e Zd ZdZddZddZdS )r"   a  Leave-One-Out cross-validator.

    Provides train/test indices to split data in train/test sets. Each
    sample is used once as a test set (singleton) while the remaining
    samples form the training set.

    Note: ``LeaveOneOut()`` is equivalent to ``KFold(n_splits=n)`` and
    ``LeavePOut(p=1)`` where ``n`` is the number of samples.

    Due to the high number of test sets (which is the same as the
    number of samples) this cross-validation method can be very costly.
    For large datasets one should favor :class:`KFold`, :class:`ShuffleSplit`
    or :class:`StratifiedKFold`.

    Read more in the :ref:`User Guide <leave_one_out>`.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.model_selection import LeaveOneOut
    >>> X = np.array([[1, 2], [3, 4]])
    >>> y = np.array([1, 2])
    >>> loo = LeaveOneOut()
    >>> loo.get_n_splits(X)
    2
    >>> print(loo)
    LeaveOneOut()
    >>> for i, (train_index, test_index) in enumerate(loo.split(X)):
    ...     print(f"Fold {i}:")
    ...     print(f"  Train: index={train_index}")
    ...     print(f"  Test:  index={test_index}")
    Fold 0:
      Train: index=[1]
      Test:  index=[0]
    Fold 1:
      Train: index=[0]
      Test:  index=[1]

    See Also
    --------
    LeaveOneGroupOut : For splitting the data according to explicit,
        domain-specific stratification of the dataset.
    GroupKFold : K-fold iterator variant with non-overlapping groups.
    Nc                     t          |          }|dk    r"t          d                    |                    t          |          S )N   z-Cannot perform LeaveOneOut with n_samples={}.)r   
ValueErrorformatrange)r=   r>   r?   r4   	n_sampless        r@   rX   zLeaveOneOut._iter_test_indices   sI     OO	>>?FFyQQ   YrA   c                 B    |t          d          t          |          S )aH  Returns the number of splitting iterations in the cross-validator.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data, where `n_samples` is the number of samples
            and `n_features` is the number of features.

        y : object
            Always ignored, exists for compatibility.

        groups : object
            Always ignored, exists for compatibility.

        Returns
        -------
        n_splits : int
            Returns the number of splitting iterations in the cross-validator.
        N%The 'X' parameter should not be None.)ro   r   r^   s       r@   r`   zLeaveOneOut.get_n_splits   s$    ( 9DEEEArA   rB   )r8   rC   rD   rE   rX   r`   rK   rA   r@   r"   r"      sG        + +Z            rA   r"   c                   (    e Zd ZdZd ZddZddZdS )r$   a  Leave-P-Out cross-validator.

    Provides train/test indices to split data in train/test sets. This results
    in testing on all distinct samples of size p, while the remaining n - p
    samples form the training set in each iteration.

    Note: ``LeavePOut(p)`` is NOT equivalent to
    ``KFold(n_splits=n_samples // p)`` which creates non-overlapping test sets.

    Due to the high number of iterations which grows combinatorically with the
    number of samples this cross-validation method can be very costly. For
    large datasets one should favor :class:`KFold`, :class:`StratifiedKFold`
    or :class:`ShuffleSplit`.

    Read more in the :ref:`User Guide <leave_p_out>`.

    Parameters
    ----------
    p : int
        Size of the test sets. Must be strictly less than the number of
        samples.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.model_selection import LeavePOut
    >>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
    >>> y = np.array([1, 2, 3, 4])
    >>> lpo = LeavePOut(2)
    >>> lpo.get_n_splits(X)
    6
    >>> print(lpo)
    LeavePOut(p=2)
    >>> for i, (train_index, test_index) in enumerate(lpo.split(X)):
    ...     print(f"Fold {i}:")
    ...     print(f"  Train: index={train_index}")
    ...     print(f"  Test:  index={test_index}")
    Fold 0:
      Train: index=[2 3]
      Test:  index=[0 1]
    Fold 1:
      Train: index=[1 3]
      Test:  index=[0 2]
    Fold 2:
      Train: index=[1 2]
      Test:  index=[0 3]
    Fold 3:
      Train: index=[0 3]
      Test:  index=[1 2]
    Fold 4:
      Train: index=[0 2]
      Test:  index=[1 3]
    Fold 5:
      Train: index=[0 1]
      Test:  index=[2 3]
    c                     || _         d S rb   )p)r=   rw   s     r@   __init__zLeavePOut.__init__4  s    rA   Nc              #     K   t          |          }|| j        k    r(t          d                    | j        |                    t	          t          |          | j                  D ]}t          j        |          V  d S )Nz8p={} must be strictly less than the number of samples={})r   rw   ro   rp   r	   rq   rN   array)r=   r>   r?   r4   rr   combinations         r@   rX   zLeavePOut._iter_test_indices7  s       OO	JQQFI   
 (i(8(8$&AA 	( 	(K(;''''''	( 	(rA   c                     |t          d          t          t          t          |          | j        d                    S )a  Returns the number of splitting iterations in the cross-validator.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data, where `n_samples` is the number of samples
            and `n_features` is the number of features.

        y : object
            Always ignored, exists for compatibility.

        groups : object
            Always ignored, exists for compatibility.
        Nrt   Texact)ro   intr   r   rw   r^   s       r@   r`   zLeavePOut.get_n_splitsB  s=     9DEEE4Qt<<<===rA   rB   )r8   rC   rD   rE   rx   rX   r`   rK   rA   r@   r$   r$      sV        7 7r  	( 	( 	( 	(> > > > > >rA   r$   c                   B     e Zd ZdZed             Zd fd	ZddZ xZS )
_BaseKFoldz;Base class for K-Fold cross-validators and TimeSeriesSplit.c                   t          |t          j                  s#t          d|dt	          |          d          t          |          }|dk    r"t          d                    |                    t          |t                    s"t          d                    |                    |s|t          d          || _	        || _
        || _        d S )Nz.The number of folds must be of Integral type. z	 of type z was passed.rn   zok-fold cross-validation requires at least one train/test split by setting n_splits=2 or more, got n_splits={0}.z&shuffle must be True or False; got {0}zSetting a random_state has no effect since shuffle is False. You should leave random_state to its default (None), or set shuffle=True.)
isinstancenumbersIntegralro   typer   rp   rZ   	TypeErrorn_splitsshufflerandom_state)r=   r   r   r   s       r@   rx   z_BaseKFold.__init__Y  s    (G$455 	*/7xxhI   x==q==%%+VH%5%5   '4(( 	VDKKGTTUUU 	<3O   !(rA   Nc              #     K   t          |||          \  }}}t          |          }| j        |k    r(t          d                    | j        |                    t                                          |||          D ]\  }}||fV  dS )a  Generate indices to split data into training and test set.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data, where `n_samples` is the number of samples
            and `n_features` is the number of features.

        y : array-like of shape (n_samples,), default=None
            The target variable for supervised learning problems.

        groups : array-like of shape (n_samples,), default=None
            Group labels for the samples used while splitting the dataset into
            train/test set.

        Yields
        ------
        train : ndarray
            The training set indices for that split.

        test : ndarray
            The testing set indices for that split.
        z\Cannot have number of splits n_splits={0} greater than the number of samples: n_samples={1}.N)r   r   r   ro   rp   r:   r;   )r=   r>   r?   r4   rr   traintestr7   s          r@   r;   z_BaseKFold.splity  s      0 !Av..1f OO	=9$$B&	22	   !77==Av66 	 	KE4+	 	rA   c                     | j         S a  Returns the number of splitting iterations in the cross-validator.

        Parameters
        ----------
        X : object
            Always ignored, exists for compatibility.

        y : object
            Always ignored, exists for compatibility.

        groups : object
            Always ignored, exists for compatibility.

        Returns
        -------
        n_splits : int
            Returns the number of splitting iterations in the cross-validator.
        r   r^   s       r@   r`   z_BaseKFold.get_n_splits      & }rA   rB   rh   )	r8   rC   rD   rE   r   rx   r;   r`   rF   rG   s   @r@   r   r   V  sq        EE) ) ^)># # # # # #J       rA   r   c                   4     e Zd ZdZdddd fdZd	dZ xZS )
r   aB
  K-Fold cross-validator.

    Provides train/test indices to split data in train/test sets. Split
    dataset into k consecutive folds (without shuffling by default).

    Each fold is then used once as a validation while the k - 1 remaining
    folds form the training set.

    Read more in the :ref:`User Guide <k_fold>`.

    For visualisation of cross-validation behaviour and
    comparison between common scikit-learn split methods
    refer to :ref:`sphx_glr_auto_examples_model_selection_plot_cv_indices.py`

    Parameters
    ----------
    n_splits : int, default=5
        Number of folds. Must be at least 2.

        .. versionchanged:: 0.22
            ``n_splits`` default value changed from 3 to 5.

    shuffle : bool, default=False
        Whether to shuffle the data before splitting into batches.
        Note that the samples within each split will not be shuffled.

    random_state : int, RandomState instance or None, default=None
        When `shuffle` is True, `random_state` affects the ordering of the
        indices, which controls the randomness of each fold. Otherwise, this
        parameter has no effect.
        Pass an int for reproducible output across multiple function calls.
        See :term:`Glossary <random_state>`.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.model_selection import KFold
    >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
    >>> y = np.array([1, 2, 3, 4])
    >>> kf = KFold(n_splits=2)
    >>> kf.get_n_splits(X)
    2
    >>> print(kf)
    KFold(n_splits=2, random_state=None, shuffle=False)
    >>> for i, (train_index, test_index) in enumerate(kf.split(X)):
    ...     print(f"Fold {i}:")
    ...     print(f"  Train: index={train_index}")
    ...     print(f"  Test:  index={test_index}")
    Fold 0:
      Train: index=[2 3]
      Test:  index=[0 1]
    Fold 1:
      Train: index=[0 1]
      Test:  index=[2 3]

    Notes
    -----
    The first ``n_samples % n_splits`` folds have size
    ``n_samples // n_splits + 1``, other folds have size
    ``n_samples // n_splits``, where ``n_samples`` is the number of samples.

    Randomized CV splitters may return different results for each call of
    split. You can make the results identical by setting `random_state`
    to an integer.

    See Also
    --------
    StratifiedKFold : Takes class information into account to avoid building
        folds with imbalanced class distributions (for binary or multiclass
        classification tasks).

    GroupKFold : K-fold iterator variant with non-overlapping groups.

    RepeatedKFold : Repeats K-Fold n times.
       FNr   r   c                P    t                                          |||           d S N)r   r   r   r:   rx   r=   r   r   r   r7   s       r@   rx   zKFold.__init__  (    (G,WWWWWrA   c              #   Z  K   t          |          }t          j        |          }| j        r't	          | j                                      |           | j        }t          j        |||z  t                    }|d ||z  xx         dz  cc<   d}|D ]}	|||	z   }}
||
|         V  |}d S )NrV   rn   r   )	r   rN   rO   r   r   r   r   fullr   )r=   r>   r?   r4   rr   rR   r   
fold_sizescurrent	fold_sizestartstops               r@   rX   zKFold._iter_test_indices  s       OO	)I&&< 	Ct01199'BBB=WXyH'<CHHH
)Y))***a/***# 	 	I!7Y#64E%*%%%%GG	 	rA   r   rB   )r8   rC   rD   rE   rx   rX   rF   rG   s   @r@   r   r     st        J JXXe$ X X X X X X X       rA   r   c                   >     e Zd ZdZd	ddd fdZd Zd
 fd	Z xZS )r    a
  K-fold iterator variant with non-overlapping groups.

    Each group will appear exactly once in the test set across all folds (the
    number of distinct groups has to be at least equal to the number of folds).

    The folds are approximately balanced in the sense that the number of
    samples is approximately the same in each test fold when `shuffle` is True.

    Read more in the :ref:`User Guide <group_k_fold>`.

    For visualisation of cross-validation behaviour and
    comparison between common scikit-learn split methods
    refer to :ref:`sphx_glr_auto_examples_model_selection_plot_cv_indices.py`

    Parameters
    ----------
    n_splits : int, default=5
        Number of folds. Must be at least 2.

        .. versionchanged:: 0.22
            ``n_splits`` default value changed from 3 to 5.

    shuffle : bool, default=False
        Whether to shuffle the groups before splitting into batches.
        Note that the samples within each split will not be shuffled.

        .. versionadded:: 1.6

    random_state : int, RandomState instance or None, default=None
        When `shuffle` is True, `random_state` affects the ordering of the
        indices, which controls the randomness of each fold. Otherwise, this
        parameter has no effect.
        Pass an int for reproducible output across multiple function calls.
        See :term:`Glossary <random_state>`.

        .. versionadded:: 1.6

    Notes
    -----
    Groups appear in an arbitrary order throughout the folds.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.model_selection import GroupKFold
    >>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12]])
    >>> y = np.array([1, 2, 3, 4, 5, 6])
    >>> groups = np.array([0, 0, 2, 2, 3, 3])
    >>> group_kfold = GroupKFold(n_splits=2)
    >>> group_kfold.get_n_splits(X, y, groups)
    2
    >>> print(group_kfold)
    GroupKFold(n_splits=2, random_state=None, shuffle=False)
    >>> for i, (train_index, test_index) in enumerate(group_kfold.split(X, y, groups)):
    ...     print(f"Fold {i}:")
    ...     print(f"  Train: index={train_index}, group={groups[train_index]}")
    ...     print(f"  Test:  index={test_index}, group={groups[test_index]}")
    Fold 0:
      Train: index=[2 3], group=[2 2]
      Test:  index=[0 1 4 5], group=[0 0 3 3]
    Fold 1:
      Train: index=[0 1 4 5], group=[0 0 3 3]
      Test:  index=[2 3], group=[2 2]

    See Also
    --------
    LeaveOneGroupOut : For splitting the data according to explicit
        domain-specific stratification of the dataset.

    StratifiedKFold : Takes class information into account to avoid building
        folds with imbalanced class proportions (for binary or multiclass
        classification tasks).
    r   FNr   c                P    t                                          |||           d S )Nr   r   r   s       r@   rx   zGroupKFold.__init___  s(    7NNNNNrA   c              #     K   |t          d          t          |ddd           }t          j        |d          \  }}t	          |          }| j        |k    rt          d| j        |fz            | j        r{t          | j                  }|	                    |          }t          j
        || j                  }|D ]3}	t          j        ||	          }
t          j        |
          d         V  4d S t          j        |          }t          j        |          d d d	         }||         }t          j        | j                  }t          j        t	          |                    }t!          |          D ]4\  }}t          j        |          }||xx         |z  cc<   ||||         <   5||         }t%          | j                  D ]"}t          j        ||k              d         V  #d S )
N*The 'groups' parameter should not be None.r4   F
input_name	ensure_2drW   Treturn_inversezOCannot have number of splits n_splits=%d greater than the number of groups: %d.r   )ro   r   rN   uniquelenr   r   r   r   permutationarray_splitisinwherebincountargsortrY   	enumerateargminrq   )r=   r>   r?   r4   unique_groups	group_idxn_groupsrngsplit_groupstest_group_idsr[   n_samples_per_grouprR   n_samples_per_foldgroup_to_foldgroup_indexweightlightest_foldfs                      r@   rX   zGroupKFold._iter_test_indicesb  s(     >IJJJVEQUVVV#%9VD#I#I#I y}%%=8##259]H4MN  
 < !	0$T%677COOM::M>-GGL". - -GFN;;	hy))!,,,,,- - #%+i"8"8 j!455ddd;G"5g"> "$$-!8!8 HS%7%788M (11D'E'E D D#V "	*< = ="=111V;1116Cgk233#I.G4=)) 0 0hw!|,,Q/////0 0rA   c                 J    t                                          |||          S a  Generate indices to split data into training and test set.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data, where `n_samples` is the number of samples
            and `n_features` is the number of features.

        y : array-like of shape (n_samples,), default=None
            The target variable for supervised learning problems.

        groups : array-like of shape (n_samples,)
            Group labels for the samples used while splitting the dataset into
            train/test set.

        Yields
        ------
        train : ndarray
            The training set indices for that split.

        test : ndarray
            The testing set indices for that split.
        r:   r;   r<   s       r@   r;   zGroupKFold.split      0 ww}}Q6***rA   r   rB   )r8   rC   rD   rE   rx   rX   r;   rF   rG   s   @r@   r    r      s        H HTOe$ O O O O O O O/0 /0 /0b+ + + + + + + + + +rA   r    c                   H     e Zd ZdZd
ddd fdZddZddZd fd		Z xZS )r)   a
  Stratified K-Fold cross-validator.

    Provides train/test indices to split data in train/test sets.

    This cross-validation object is a variation of KFold that returns
    stratified folds. The folds are made by preserving the percentage of
    samples for each class.

    Read more in the :ref:`User Guide <stratified_k_fold>`.

    For visualisation of cross-validation behaviour and
    comparison between common scikit-learn split methods
    refer to :ref:`sphx_glr_auto_examples_model_selection_plot_cv_indices.py`

    Parameters
    ----------
    n_splits : int, default=5
        Number of folds. Must be at least 2.

        .. versionchanged:: 0.22
            ``n_splits`` default value changed from 3 to 5.

    shuffle : bool, default=False
        Whether to shuffle each class's samples before splitting into batches.
        Note that the samples within each split will not be shuffled.

    random_state : int, RandomState instance or None, default=None
        When `shuffle` is True, `random_state` affects the ordering of the
        indices, which controls the randomness of each fold for each class.
        Otherwise, leave `random_state` as `None`.
        Pass an int for reproducible output across multiple function calls.
        See :term:`Glossary <random_state>`.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.model_selection import StratifiedKFold
    >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
    >>> y = np.array([0, 0, 1, 1])
    >>> skf = StratifiedKFold(n_splits=2)
    >>> skf.get_n_splits(X, y)
    2
    >>> print(skf)
    StratifiedKFold(n_splits=2, random_state=None, shuffle=False)
    >>> for i, (train_index, test_index) in enumerate(skf.split(X, y)):
    ...     print(f"Fold {i}:")
    ...     print(f"  Train: index={train_index}")
    ...     print(f"  Test:  index={test_index}")
    Fold 0:
      Train: index=[1 3]
      Test:  index=[0 2]
    Fold 1:
      Train: index=[0 2]
      Test:  index=[1 3]

    Notes
    -----
    The implementation is designed to:

    * Generate test sets such that all contain the same distribution of
      classes, or as close as possible.
    * Be invariant to class label: relabelling ``y = ["Happy", "Sad"]`` to
      ``y = [1, 0]`` should not change the indices generated.
    * Preserve order dependencies in the dataset ordering, when
      ``shuffle=False``: all samples from class k in some test set were
      contiguous in y, or separated in y by samples from classes other than k.
    * Generate test sets where the smallest and largest differ by at most one
      sample.

    .. versionchanged:: 0.22
        The previous implementation did not follow the last constraint.

    See Also
    --------
    RepeatedStratifiedKFold : Repeats Stratified K-Fold n times.
    r   FNr   c                P    t                                          |||           d S r   r   r   s       r@   rx   zStratifiedKFold.__init__  r   rA   c                     t           j                  }t          |          \  }}|rt          ||          }nt	          j        |          }t          |          }d}||vr#t          d                    ||                    t          |          }t	          j
        |dd          \  }}	}
t	          j
        |	d          \  }}||
         }t          |	          t	          j        |          }t	          j        |          }t	          j         j        |k              rt          d j        z             j        |k    r$t!          j        d| j        fz  t$                     t	          j        |          t	          j         fdt)           j                  D                       }t	          j        t          |          d	
          }t)                    D ]]}t	          j         j                                      |d d |f                   } j        r|                    |           ||||k    <   ^|S )Nbinary
multiclass1Supported target types are: {}. Got {!r} instead.T)return_indexr   r   Gn_splits=%d cannot be greater than the number of members in each class.SThe least populated class in y has only %d members, which is less than n_splits=%d.c                 X    g | ]&}t          j        |d j                           'S )N)	minlength)rN   r   r   ).0i	n_classesr=   y_orders     r@   
<listcomp>z4StratifiedKFold._make_test_folds.<locals>.<listcomp>1  sG        GA$6$679MMM  rA   r   rV   )r   r   r   r   rN   asarrayr   ro   rp   r   r   r   r   minallr   r5   r6   r9   sortrq   emptyrO   repeatr   )r=   r>   r?   r   xpis_array_apitype_of_target_yallowed_target_types_y_idxy_inv
class_perm	y_encodedy_counts
min_groups
allocation
test_foldskfolds_for_classr   r   s   `                  @@r@   _make_test_foldsz StratifiedKFold._make_test_folds  sw    !233
 )++L 	!!R((AA
1A)!,,7#777CJJ(*:    OO)ADNNN5% 	%===:u%	JJ	;y))VH%%
6$-(*++ 	47;}F   =:%%M<t}-. 	   ')$$Z     t}--  
 

 Xc!ffC000
y!! 	9 	9A !i66==jA>NOOO| -O,,,)8JyA~&&rA   c              #   t   K   |                      ||          }t          | j                  D ]
}||k    V  d S rb   )r   rq   r   )r=   r>   r?   r4   r   r   s         r@   rP   z StratifiedKFold._iter_test_masksE  sQ      **1a00
t}%% 	" 	"A/!!!!	" 	"rA   c                     |'t          j        d| j        j         t                     t          |ddd          }t                                          |||          S )m  Generate indices to split data into training and test set.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data, where `n_samples` is the number of samples
            and `n_features` is the number of features.

            Note that providing ``y`` is sufficient to generate the splits and
            hence ``np.zeros(n_samples)`` may be used as a placeholder for
            ``X`` instead of actual training data.

        y : array-like of shape (n_samples,)
            The target variable for supervised learning problems.
            Stratification is done based on the y labels.

        groups : object
            Always ignored, exists for compatibility.

        Yields
        ------
        train : ndarray
            The training set indices for that split.

        test : ndarray
            The testing set indices for that split.

        Notes
        -----
        Randomized CV splitters may return different results for each call of
        split. You can make the results identical by setting `random_state`
        to an integer.
        Nr2   r?   Fr   r5   r6   r7   r8   r9   r   r:   r;   r<   s       r@   r;   zStratifiedKFold.splitJ  e    D MOdn6MOO   cU$GGGww}}Q6***rA   r   rb   rB   )	r8   rC   rD   rE   rx   r   rP   r;   rF   rG   s   @r@   r)   r)     s        K KZXe$ X X X X X X XD D D DL" " " "
(+ (+ (+ (+ (+ (+ (+ (+ (+ (+rA   r)   c                   0     e Zd ZdZd fd	Zd Zd Z xZS )	r*   a  Stratified K-Fold iterator variant with non-overlapping groups.

    This cross-validation object is a variation of StratifiedKFold attempts to
    return stratified folds with non-overlapping groups. The folds are made by
    preserving the percentage of samples for each class.

    Each group will appear exactly once in the test set across all folds (the
    number of distinct groups has to be at least equal to the number of folds).

    The difference between :class:`GroupKFold`
    and `StratifiedGroupKFold` is that
    the former attempts to create balanced folds such that the number of
    distinct groups is approximately the same in each fold, whereas
    `StratifiedGroupKFold` attempts to create folds which preserve the
    percentage of samples for each class as much as possible given the
    constraint of non-overlapping groups between splits.

    Read more in the :ref:`User Guide <stratified_group_k_fold>`.

    For visualisation of cross-validation behaviour and
    comparison between common scikit-learn split methods
    refer to :ref:`sphx_glr_auto_examples_model_selection_plot_cv_indices.py`

    Parameters
    ----------
    n_splits : int, default=5
        Number of folds. Must be at least 2.

    shuffle : bool, default=False
        Whether to shuffle each class's samples before splitting into batches.
        Note that the samples within each split will not be shuffled.
        This implementation can only shuffle groups that have approximately the
        same y distribution, no global shuffle will be performed.

    random_state : int or RandomState instance, default=None
        When `shuffle` is True, `random_state` affects the ordering of the
        indices, which controls the randomness of each fold for each class.
        Otherwise, leave `random_state` as `None`.
        Pass an int for reproducible output across multiple function calls.
        See :term:`Glossary <random_state>`.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.model_selection import StratifiedGroupKFold
    >>> X = np.ones((17, 2))
    >>> y = np.array([0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])
    >>> groups = np.array([1, 1, 2, 2, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 7, 8, 8])
    >>> sgkf = StratifiedGroupKFold(n_splits=3)
    >>> sgkf.get_n_splits(X, y)
    3
    >>> print(sgkf)
    StratifiedGroupKFold(n_splits=3, random_state=None, shuffle=False)
    >>> for i, (train_index, test_index) in enumerate(sgkf.split(X, y, groups)):
    ...     print(f"Fold {i}:")
    ...     print(f"  Train: index={train_index}")
    ...     print(f"         group={groups[train_index]}")
    ...     print(f"  Test:  index={test_index}")
    ...     print(f"         group={groups[test_index]}")
    Fold 0:
      Train: index=[ 0  1  2  3  7  8  9 10 11 15 16]
             group=[1 1 2 2 4 5 5 5 5 8 8]
      Test:  index=[ 4  5  6 12 13 14]
             group=[3 3 3 6 6 7]
    Fold 1:
      Train: index=[ 4  5  6  7  8  9 10 11 12 13 14]
             group=[3 3 3 4 5 5 5 5 6 6 7]
      Test:  index=[ 0  1  2  3 15 16]
             group=[1 1 2 2 8 8]
    Fold 2:
      Train: index=[ 0  1  2  3  4  5  6 12 13 14 15 16]
             group=[1 1 2 2 3 3 3 6 6 7 8 8]
      Test:  index=[ 7  8  9 10 11]
             group=[4 5 5 5 5]

    Notes
    -----
    The implementation is designed to:

    * Mimic the behavior of StratifiedKFold as much as possible for trivial
      groups (e.g. when each group contains only one sample).
    * Be invariant to class label: relabelling ``y = ["Happy", "Sad"]`` to
      ``y = [1, 0]`` should not change the indices generated.
    * Stratify based on samples as much as possible while keeping
      non-overlapping groups constraint. That means that in some cases when
      there is a small number of groups containing a large number of samples
      the stratification will not be possible and the behavior will be close
      to GroupKFold.

    See also
    --------
    StratifiedKFold: Takes class information into account to build folds which
        retain class distributions (for binary or multiclass classification
        tasks).

    GroupKFold: K-fold iterator variant with non-overlapping groups.
    r   FNc                 P    t                                          |||           d S r   r   r   s       r@   rx   zStratifiedGroupKFold.__init__  r   rA   c              #     K   t          | j                  }t          j        |          }t	          |          }d}||vr#t          d                    ||                    t          |          }t          j        |dd          \  }}}	t          j	        | j
        |	k              rt          d| j
        z            t          j        |	          }
| j
        |
k    r$t          j        d|
| j
        fz  t                     t          |	          }t          j        |dd          \  }}}t          j        t          |          |f          }t#          ||          D ]\  }}|||fxx         dz  cc<   t          j        | j
        |f          }t%          t&                    | j        r|                    |           t          j        t          j        |d           d	
          }|D ]M}||         }|                     ||	|          }||xx         |z  cc<   |                             |           Nt3          | j
                  D ]"fdt5          |          D             }|V  #d S )Nr   r   T)r   return_countsr   r   rn   axis	mergesortkind)y_counts_per_foldy_cntgroup_y_countsc                 .    g | ]\  }}|         v |S rK   rK   )r   idxr   groups_per_foldr   s      r@   r   z;StratifiedGroupKFold._iter_test_indices.<locals>.<listcomp>%  s7       "C 222 222rA   )r   r   rN   r   r   ro   rp   r   r   r   r   r   r5   r6   r9   r   rY   zipr   setr   r   std_find_best_foldaddrq   r   )r=   r>   r?   r4   r   r   r   r   r   r   n_smallest_classr   
groups_inv
groups_cnty_counts_per_group	class_idxr   r   sorted_groups_idxr   	best_foldtest_indicesr  r   s                         @@r@   rX   z'StratifiedGroupKFold._iter_test_indices  s       !!233JqMM)!,,7#777CJJ(*:    OO)Ad$OOO5%6$-%'(( 	47;}F   6%===+++M<#T]34 	   JJ	$&I4t%
 %
 %
!:z  Xs:	&BCC$'z$:$: 	: 	: Iyy)344494444HdmY%?@@%c**< 	,KK*+++ JV&Q////k
 
 
 + 	6 	6I/	:N,,"3- -  I
 i(((N:(((I&**95555t}%% 	 	A    &/
&;&;  L
 	 	rA   c                    d }t           j        }t           j        }t          | j                  D ]}||xx         |z  cc<   t          j        ||                    dd          z  d          }||xx         |z  cc<   t          j        |          }	t          j        ||                   }
|	|k     pt          j        |	|          o|
|k     }|r|	}|
}|}|S )Nrn   r   r   r   )	rN   infrq   r   r  reshapemeansumisclose)r=   r   r   r   r  min_evalmin_samples_in_foldr   std_per_class	fold_evalsamples_in_foldis_current_fold_betters               r@   r  z$StratifiedGroupKFold._find_best_fold,  s   	6 ft}%% 	 	Aa   N2   F#4u}}Q7K7K#KRSTTTMa   N2   ..I f%6q%9::OH$ ::i22 :#&99 #
 & $&5#	rA   )r   FN)r8   rC   rD   rE   rx   rX   r  rF   rG   s   @r@   r*   r*   u  sr        ` `DX X X X X XO O Ob      rA   r*   c                   <     e Zd ZdZd	dddd fdZd
dZd Z xZS )TimeSeriesSplita  Time Series cross-validator.

    Provides train/test indices to split time series data samples
    that are observed at fixed time intervals, in train/test sets.
    In each split, test indices must be higher than before, and thus shuffling
    in cross validator is inappropriate.

    This cross-validation object is a variation of :class:`KFold`.
    In the kth split, it returns first k folds as train set and the
    (k+1)th fold as test set.

    Note that unlike standard cross-validation methods, successive
    training sets are supersets of those that come before them.

    Read more in the :ref:`User Guide <time_series_split>`.

    For visualisation of cross-validation behaviour and
    comparison between common scikit-learn split methods
    refer to :ref:`sphx_glr_auto_examples_model_selection_plot_cv_indices.py`

    .. versionadded:: 0.18

    Parameters
    ----------
    n_splits : int, default=5
        Number of splits. Must be at least 2.

        .. versionchanged:: 0.22
            ``n_splits`` default value changed from 3 to 5.

    max_train_size : int, default=None
        Maximum size for a single training set.

    test_size : int, default=None
        Used to limit the size of the test set. Defaults to
        ``n_samples // (n_splits + 1)``, which is the maximum allowed value
        with ``gap=0``.

        .. versionadded:: 0.24

    gap : int, default=0
        Number of samples to exclude from the end of each train set before
        the test set.

        .. versionadded:: 0.24

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.model_selection import TimeSeriesSplit
    >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [1, 2], [3, 4]])
    >>> y = np.array([1, 2, 3, 4, 5, 6])
    >>> tscv = TimeSeriesSplit()
    >>> print(tscv)
    TimeSeriesSplit(gap=0, max_train_size=None, n_splits=5, test_size=None)
    >>> for i, (train_index, test_index) in enumerate(tscv.split(X)):
    ...     print(f"Fold {i}:")
    ...     print(f"  Train: index={train_index}")
    ...     print(f"  Test:  index={test_index}")
    Fold 0:
      Train: index=[0]
      Test:  index=[1]
    Fold 1:
      Train: index=[0 1]
      Test:  index=[2]
    Fold 2:
      Train: index=[0 1 2]
      Test:  index=[3]
    Fold 3:
      Train: index=[0 1 2 3]
      Test:  index=[4]
    Fold 4:
      Train: index=[0 1 2 3 4]
      Test:  index=[5]
    >>> # Fix test_size to 2 with 12 samples
    >>> X = np.random.randn(12, 2)
    >>> y = np.random.randint(0, 2, 12)
    >>> tscv = TimeSeriesSplit(n_splits=3, test_size=2)
    >>> for i, (train_index, test_index) in enumerate(tscv.split(X)):
    ...     print(f"Fold {i}:")
    ...     print(f"  Train: index={train_index}")
    ...     print(f"  Test:  index={test_index}")
    Fold 0:
      Train: index=[0 1 2 3 4 5]
      Test:  index=[6 7]
    Fold 1:
      Train: index=[0 1 2 3 4 5 6 7]
      Test:  index=[8 9]
    Fold 2:
      Train: index=[0 1 2 3 4 5 6 7 8 9]
      Test:  index=[10 11]
    >>> # Add in a 2 period gap
    >>> tscv = TimeSeriesSplit(n_splits=3, test_size=2, gap=2)
    >>> for i, (train_index, test_index) in enumerate(tscv.split(X)):
    ...     print(f"Fold {i}:")
    ...     print(f"  Train: index={train_index}")
    ...     print(f"  Test:  index={test_index}")
    Fold 0:
      Train: index=[0 1 2 3]
      Test:  index=[6 7]
    Fold 1:
      Train: index=[0 1 2 3 4 5]
      Test:  index=[8 9]
    Fold 2:
      Train: index=[0 1 2 3 4 5 6 7]
      Test:  index=[10 11]

    For a more extended example see
    :ref:`sphx_glr_auto_examples_applications_plot_cyclical_feature_engineering.py`.

    Notes
    -----
    The training set has size ``i * n_samples // (n_splits + 1)
    + n_samples % (n_splits + 1)`` in the ``i`` th split,
    with a test set of size ``n_samples//(n_splits + 1)`` by default,
    where ``n_samples`` is the number of samples. Note that this
    formula is only valid when ``test_size`` and ``max_train_size`` are
    left to their default values.
    r   Nr   )max_train_size	test_sizegapc                z    t                                          |dd            || _        || _        || _        d S )NFr   )r:   rx   r  r  r  )r=   r   r  r  r  r7   s        r@   rx   zTimeSeriesSplit.__init__  s<    5tDDD,"rA   c                 ~    |'t          j        d| j        j         t                     |                     |          S )a  Generate indices to split data into training and test set.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data, where `n_samples` is the number of samples
            and `n_features` is the number of features.

        y : array-like of shape (n_samples,)
            Always ignored, exists for compatibility.

        groups : array-like of shape (n_samples,)
            Always ignored, exists for compatibility.

        Yields
        ------
        train : ndarray
            The training set indices for that split.

        test : ndarray
            The testing set indices for that split.
        Nr2   r5   r6   r7   r8   r9   _splitr^   s       r@   r;   zTimeSeriesSplit.split  sC    . MOdn6MOO   {{1~~rA   c              #   (  K   t          |          \  }t          |          }| j        }|dz   }| j        }| j        | j        n||z  }||k    rt          d| d| d          ||z
  ||z  z
  dk    rt          d| d| d	| d
| d	          t          j        |          }t          |||z  z
  ||          }|D ]T}	|	|z
  }
| j	        r-| j	        |
k     r"||
| j	        z
  |
         ||	|	|z            fV  ;|d|
         ||	|	|z            fV  UdS )a  Generate indices to split data into training and test set.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data, where `n_samples` is the number of samples
            and `n_features` is the number of features.

        Yields
        ------
        train : ndarray
            The training set indices for that split.

        test : ndarray
            The testing set indices for that split.
        rn   NzCannot have number of folds=z$ greater than the number of samples=.r   zToo many splits=z for number of samples=z with test_size=z	 and gap=)
r   r   r   r  r  ro   rN   rO   rq   r  )r=   r>   rr   r   n_foldsr  r  rR   test_starts
test_start	train_ends              r@   r#  zTimeSeriesSplit._split  s     " || OO	=Q,h"n8DNNi7>R 	
 Y<w < </8< < <   s?i(23q88J8 J JJ J/8J JCFJ J J  
 )I&&I9(<<iSS% 	 	J"S(I" 	t':Y'F'FI(;;iGHJi)??@     JYJ'Ji)??@    	 	rA   r   rB   )r8   rC   rD   rE   rx   r;   r#  rF   rG   s   @r@   r  r  C  s}        v vpTTq          <4 4 4 4 4 4 4rA   r  c                   2     e Zd ZdZd ZddZd fd	Z xZS )r!   a  Leave One Group Out cross-validator.

    Provides train/test indices to split data such that each training set is
    comprised of all samples except ones belonging to one specific group.
    Arbitrary domain specific group information is provided as an array of integers
    that encodes the group of each sample.

    For instance the groups could be the year of collection of the samples
    and thus allow for cross-validation against time-based splits.

    Read more in the :ref:`User Guide <leave_one_group_out>`.

    Notes
    -----
    Splits are ordered according to the index of the group left out. The first
    split has testing set consisting of the group whose index in `groups` is
    lowest, and so on.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.model_selection import LeaveOneGroupOut
    >>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
    >>> y = np.array([1, 2, 1, 2])
    >>> groups = np.array([1, 1, 2, 2])
    >>> logo = LeaveOneGroupOut()
    >>> logo.get_n_splits(X, y, groups)
    2
    >>> logo.get_n_splits(groups=groups)  # 'groups' is always required
    2
    >>> print(logo)
    LeaveOneGroupOut()
    >>> for i, (train_index, test_index) in enumerate(logo.split(X, y, groups)):
    ...     print(f"Fold {i}:")
    ...     print(f"  Train: index={train_index}, group={groups[train_index]}")
    ...     print(f"  Test:  index={test_index}, group={groups[test_index]}")
    Fold 0:
      Train: index=[2 3], group=[2 2]
      Test:  index=[0 1], group=[1 1]
    Fold 1:
      Train: index=[0 1], group=[1 1]
      Test:  index=[2 3], group=[2 2]

    See also
    --------
    GroupKFold: K-fold iterator variant with non-overlapping groups.
    c              #      K   |t          d          t          |dddd           }t          j        |          }t	          |          dk    rt          d|z            |D ]
}||k    V  d S )Nr   r4   TFr   copyr   rW   rn   zcThe groups parameter contains fewer than 2 unique groups (%s). LeaveOneGroupOut expects at least 2.)ro   r   rN   r   r   )r=   r>   r?   r4   r   r   s         r@   rP   z!LeaveOneGroupOut._iter_test_masksH  s      >IJJJxde4
 
 
 	&))}""=?LM    	 	AA+	 	rA   Nc                     |t          d          t          |ddd          }t          t          j        |                    S )  Returns the number of splitting iterations in the cross-validator.

        Parameters
        ----------
        X : object
            Always ignored, exists for compatibility.

        y : object
            Always ignored, exists for compatibility.

        groups : array-like of shape (n_samples,)
            Group labels for the samples used while splitting the dataset into
            train/test set. This 'groups' parameter must always be specified to
            calculate the number of splits, though the other parameters can be
            omitted.

        Returns
        -------
        n_splits : int
            Returns the number of splitting iterations in the cross-validator.
        Nr   r4   Fr   )ro   r   r   rN   r   r^   s       r@   r`   zLeaveOneGroupOut.get_n_splitsX  sG    , >IJJJVEQUVVV29V$$%%%rA   c                 J    t                                          |||          S r   r   r<   s       r@   r;   zLeaveOneGroupOut.splits  r   rA   rh   rB   )r8   rC   rD   rE   rP   r`   r;   rF   rG   s   @r@   r!   r!     sk        . .`   & & & &6+ + + + + + + + + +rA   r!   c                   8     e Zd ZdZd Zd ZddZd fd	Z xZS )	r#   ac  Leave P Group(s) Out cross-validator.

    Provides train/test indices to split data according to a third-party
    provided group. This group information can be used to encode arbitrary
    domain specific stratifications of the samples as integers.

    For instance the groups could be the year of collection of the samples
    and thus allow for cross-validation against time-based splits.

    The difference between LeavePGroupsOut and LeaveOneGroupOut is that
    the former builds the test sets with all the samples assigned to
    ``p`` different values of the groups while the latter uses samples
    all assigned the same groups.

    Read more in the :ref:`User Guide <leave_p_groups_out>`.

    Parameters
    ----------
    n_groups : int
        Number of groups (``p``) to leave out in the test split.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.model_selection import LeavePGroupsOut
    >>> X = np.array([[1, 2], [3, 4], [5, 6]])
    >>> y = np.array([1, 2, 1])
    >>> groups = np.array([1, 2, 3])
    >>> lpgo = LeavePGroupsOut(n_groups=2)
    >>> lpgo.get_n_splits(X, y, groups)
    3
    >>> lpgo.get_n_splits(groups=groups)  # 'groups' is always required
    3
    >>> print(lpgo)
    LeavePGroupsOut(n_groups=2)
    >>> for i, (train_index, test_index) in enumerate(lpgo.split(X, y, groups)):
    ...     print(f"Fold {i}:")
    ...     print(f"  Train: index={train_index}, group={groups[train_index]}")
    ...     print(f"  Test:  index={test_index}, group={groups[test_index]}")
    Fold 0:
      Train: index=[2], group=[3]
      Test:  index=[0 1], group=[1 2]
    Fold 1:
      Train: index=[1], group=[2]
      Test:  index=[0 2], group=[1 3]
    Fold 2:
      Train: index=[0], group=[1]
      Test:  index=[1 2], group=[2 3]

    See Also
    --------
    GroupKFold : K-fold iterator variant with non-overlapping groups.
    c                     || _         d S rb   )r   )r=   r   s     r@   rx   zLeavePGroupsOut.__init__  s     rA   c              #     K   |t          d          t          |dddd           }t          j        |          }| j        t          |          k    r"t          d| j        || j        dz   fz            t          t          t          |                    | j                  }|D ]T}t          j        t          |          t                    }|t          j        |                   D ]}d|||k    <   |V  Ud S )	Nr   r4   TFr,  zThe groups parameter contains fewer than (or equal to) n_groups (%d) numbers of unique groups (%s). LeavePGroupsOut expects that at least n_groups + 1 (%d) unique groups be presentrn   rV   )ro   r   rN   r   r   r   r	   rq   rY   r   rZ   rz   )	r=   r>   r?   r4   r   combirR   rS   ls	            r@   rP   z LeavePGroupsOut._iter_test_masks  s%     >IJJJxde4
 
 
 	&))=C.... "]M4=1;LMN   U3}#5#566FF 	 	G,q//>>>J"28G#4#45 / /*.
6Q;''		 	rA   Nc           	          |t          d          t          |ddd          }t          t          t	          t          j        |                    | j        d                    S )r/  Nr   r4   Fr   Tr}   )ro   r   r   r   r   rN   r   r   r^   s       r@   r`   zLeavePGroupsOut.get_n_splits  s`    , >IJJJVEQUVVV4BIf--..TJJJKKKrA   c                 J    t                                          |||          S r   r   r<   s       r@   r;   zLeavePGroupsOut.split  r   rA   rh   rB   )	r8   rC   rD   rE   rx   rP   r`   r;   rF   rG   s   @r@   r#   r#     s~        4 4l! ! !  *L L L L6+ + + + + + + + + +rA   r#   c                   H    e Zd ZdZdej        iZddddZd
dZddZ	d	 Z
dS )_RepeatedSplitsa  Repeated splits for an arbitrary randomized CV splitter.

    Repeats splits for cross-validators n times with different randomization
    in each repetition.

    Parameters
    ----------
    cv : callable
        Cross-validator class.

    n_repeats : int, default=10
        Number of times cross-validator needs to be repeated.

    random_state : int, RandomState instance or None, default=None
        Passes `random_state` to the arbitrary repeating cross validator.
        Pass an int for reproducible output across multiple function calls.
        See :term:`Glossary <random_state>`.

    **cvargs : additional params
        Constructor parameters for cv. Must not contain random_state
        and shuffle.
    r4   
   N)	n_repeatsr   c                   t          |t          j                  st          d          |dk    rt          d          t	          fddD                       rt          d          || _        || _        || _        | _        d S )Nz/Number of repetitions must be of Integral type.r   z-Number of repetitions must be greater than 0.c              3       K   | ]}|v V  	d S rb   rK   )r   keycvargss     r@   	<genexpr>z+_RepeatedSplits.__init__.<locals>.<genexpr>8  s'      DDsf}DDDDDDrA   r   r   z0cvargs must not contain random_state or shuffle.)	r   r   r   ro   anycvr;  r   r?  )r=   rC  r;  r   r?  s       `r@   rx   z_RepeatedSplits.__init__1  s    )W%566 	PNOOO>>LMMMDDDD(CDDDDD 	QOPPP"(rA   c              #      K   | j         }t          | j                  }t          |          D ]:} | j        d|dd| j        }|                    |||          D ]\  }}	||	fV  ;dS )a  Generates indices to split data into training and test set.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data, where `n_samples` is the number of samples
            and `n_features` is the number of features.

        y : array-like of shape (n_samples,)
            The target variable for supervised learning problems.

        groups : array-like of shape (n_samples,), default=None
            Group labels for the samples used while splitting the dataset into
            train/test set.

        Yields
        ------
        train : ndarray
            The training set indices for that split.

        test : ndarray
            The testing set indices for that split.
        TrA  NrK   )r;  r   r   rq   rC  r?  r;   )
r=   r>   r?   r4   r;  r   r   rC  rT   rS   s
             r@   r;   z_RepeatedSplits.split@  s      0 N	 !233## 	. 	.CGc4GG4;GGB+-88Aq&+A+A . .'Z!:-----.	. 	.rA   c                     t          | j                  } | j        d|dd| j        }|                    |||          | j        z  S )a  Returns the number of splitting iterations in the cross-validator.

        Parameters
        ----------
        X : object
            Always ignored, exists for compatibility.
            ``np.zeros(n_samples)`` may be used as a placeholder.

        y : object
            Always ignored, exists for compatibility.
            ``np.zeros(n_samples)`` may be used as a placeholder.

        groups : array-like of shape (n_samples,), default=None
            Group labels for the samples used while splitting the dataset into
            train/test set.

        Returns
        -------
        n_splits : int
            Returns the number of splitting iterations in the cross-validator.
        TrA  rK   )r   r   rC  r?  r`   r;  )r=   r>   r?   r4   r   rC  s         r@   r`   z_RepeatedSplits.get_n_splits`  sQ    , !!233TWC#tCCt{CCq!V,,t~==rA   c                      t          |           S rb   rc   re   s    r@   rf   z_RepeatedSplits.__repr__z  rg   rA   rB   rh   )r8   rC   rD   rE   r   ri   (_RepeatedSplits__metadata_request__splitrx   r;   r`   rf   rK   rA   r@   r9  r9    s         6 "*+;+B C(*     . . . .@> > > >4! ! ! ! !rA   r9  c                   ,     e Zd ZdZdddd fd
Z xZS )r&   a  Repeated K-Fold cross validator.

    Repeats K-Fold n times with different randomization in each repetition.

    Read more in the :ref:`User Guide <repeated_k_fold>`.

    Parameters
    ----------
    n_splits : int, default=5
        Number of folds. Must be at least 2.

    n_repeats : int, default=10
        Number of times cross-validator needs to be repeated.

    random_state : int, RandomState instance or None, default=None
        Controls the randomness of each repeated cross-validation instance.
        Pass an int for reproducible output across multiple function calls.
        See :term:`Glossary <random_state>`.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.model_selection import RepeatedKFold
    >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
    >>> y = np.array([0, 0, 1, 1])
    >>> rkf = RepeatedKFold(n_splits=2, n_repeats=2, random_state=2652124)
    >>> rkf.get_n_splits(X, y)
    4
    >>> print(rkf)
    RepeatedKFold(n_repeats=2, n_splits=2, random_state=2652124)
    >>> for i, (train_index, test_index) in enumerate(rkf.split(X)):
    ...     print(f"Fold {i}:")
    ...     print(f"  Train: index={train_index}")
    ...     print(f"  Test:  index={test_index}")
    ...
    Fold 0:
      Train: index=[0 1]
      Test:  index=[2 3]
    Fold 1:
      Train: index=[2 3]
      Test:  index=[0 1]
    Fold 2:
      Train: index=[1 2]
      Test:  index=[0 3]
    Fold 3:
      Train: index=[0 3]
      Test:  index=[1 2]

    Notes
    -----
    Randomized CV splitters may return different results for each call of
    split. You can make the results identical by setting `random_state`
    to an integer.

    See Also
    --------
    RepeatedStratifiedKFold : Repeats Stratified K-Fold n times.
    r   r:  Nr   r;  r   c                \    t                                          t          |||           d S N)r;  r   r   )r:   rx   r   r=   r   r;  r   r7   s       r@   rx   zRepeatedKFold.__init__  s7    Y\H 	 	
 	
 	
 	
 	
rA   r8   rC   rD   rE   rx   rF   rG   s   @r@   r&   r&   ~  sV        9 9v $% 
 
 
 
 
 
 
 
 
 
 
rA   r&   c                   8     e Zd ZdZdddd fd
Zd fd	Z xZS )	r%   a#  Repeated Stratified K-Fold cross validator.

    Repeats Stratified K-Fold n times with different randomization in each
    repetition.

    Read more in the :ref:`User Guide <repeated_k_fold>`.

    Parameters
    ----------
    n_splits : int, default=5
        Number of folds. Must be at least 2.

    n_repeats : int, default=10
        Number of times cross-validator needs to be repeated.

    random_state : int, RandomState instance or None, default=None
        Controls the generation of the random states for each repetition.
        Pass an int for reproducible output across multiple function calls.
        See :term:`Glossary <random_state>`.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.model_selection import RepeatedStratifiedKFold
    >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
    >>> y = np.array([0, 0, 1, 1])
    >>> rskf = RepeatedStratifiedKFold(n_splits=2, n_repeats=2,
    ...     random_state=36851234)
    >>> rskf.get_n_splits(X, y)
    4
    >>> print(rskf)
    RepeatedStratifiedKFold(n_repeats=2, n_splits=2, random_state=36851234)
    >>> for i, (train_index, test_index) in enumerate(rskf.split(X, y)):
    ...     print(f"Fold {i}:")
    ...     print(f"  Train: index={train_index}")
    ...     print(f"  Test:  index={test_index}")
    ...
    Fold 0:
      Train: index=[1 2]
      Test:  index=[0 3]
    Fold 1:
      Train: index=[0 3]
      Test:  index=[1 2]
    Fold 2:
      Train: index=[1 3]
      Test:  index=[0 2]
    Fold 3:
      Train: index=[0 2]
      Test:  index=[1 3]

    Notes
    -----
    Randomized CV splitters may return different results for each call of
    split. You can make the results identical by setting `random_state`
    to an integer.

    See Also
    --------
    RepeatedKFold : Repeats K-Fold n times.
    r   r:  NrI  c                \    t                                          t          |||           d S rK  )r:   rx   r)   rL  s       r@   rx   z RepeatedStratifiedKFold.__init__  s:    %	 	 	
 	
 	
 	
 	
rA   c                 r    t          |ddd          }t                                          |||          S )r   r?   FNr   r3   )r   r:   r;   r<   s       r@   r;   zRepeatedStratifiedKFold.split  s8    D cU$GGGww}}Q&}111rA   rb   )r8   rC   rD   rE   rx   r;   rF   rG   s   @r@   r%   r%     st        ; ;z $% 
 
 
 
 
 
 
#2 #2 #2 #2 #2 #2 #2 #2 #2 #2rA   r%   c                   V    e Zd ZdZdej        iZ	 ddddddZddZddZ	dd	Z
d
 ZdS )BaseShuffleSplita[  Base class for *ShuffleSplit.

    Parameters
    ----------
    n_splits : int, default=10
        Number of re-shuffling & splitting iterations.

    test_size : float or int, default=None
        If float, should be between 0.0 and 1.0 and represent the proportion
        of the dataset to include in the test split. If int, represents the
        absolute number of test samples. If None, the value is set to the
        complement of the train size. If ``train_size`` is also None, it will
        be set to 0.1.

    train_size : float or int, default=None
        If float, should be between 0.0 and 1.0 and represent the
        proportion of the dataset to include in the train split. If
        int, represents the absolute number of train samples. If None,
        the value is automatically set to the complement of the test size.

    random_state : int, RandomState instance or None, default=None
        Controls the randomness of the training and testing indices produced.
        Pass an int for reproducible output across multiple function calls.
        See :term:`Glossary <random_state>`.
    r4   r:  Nr  
train_sizer   c                L    || _         || _        || _        || _        d| _        d S )N皙?)r   r  rT  r   _default_test_size)r=   r   r  rT  r   s        r@   rx   zBaseShuffleSplit.__init__M  s/     !"$("%rA   c              #   z   K   t          |||          \  }}}|                     |||          D ]\  }}||fV  dS )a  Generate indices to split data into training and test set.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data, where `n_samples` is the number of samples
            and `n_features` is the number of features.

        y : array-like of shape (n_samples,)
            The target variable for supervised learning problems.

        groups : array-like of shape (n_samples,), default=None
            Group labels for the samples used while splitting the dataset into
            train/test set.

        Yields
        ------
        train : ndarray
            The training set indices for that split.

        test : ndarray
            The testing set indices for that split.

        Notes
        -----
        Randomized CV splitters may return different results for each call of
        split. You can make the results identical by setting `random_state`
        to an integer.
        N)r   _iter_indicesr=   r>   r?   r4   r   r   s         r@   r;   zBaseShuffleSplit.splitV  sa      < !Av..1f--aF;; 	 	KE4+	 	rA   c              #   ,  K   t          |          }t          || j        | j        | j                  \  }}t          | j                  }t          | j                  D ]4}|	                    |          }	|	d|         }
|	|||z            }||
fV  5dS )zGenerate (train, test) indicesdefault_test_sizeN)
r   _validate_shuffle_splitr  rT  rW  r   r   rq   r   r   )r=   r>   r?   r4   rr   n_trainn_testr   r   r   ind_test	ind_trains               r@   rY  zBaseShuffleSplit._iter_indicesx  s       OO	1NO"5	
 
 
 !!233t}%% 	& 	&A//)44K"7F7+H#Ffw.>$?@IX%%%%%	& 	&rA   c                     | j         S r   r   r^   s       r@   r`   zBaseShuffleSplit.get_n_splits  r   rA   c                      t          |           S rb   rc   re   s    r@   rf   zBaseShuffleSplit.__repr__  rg   rA   r:  rB   rh   )r8   rC   rD   rE   r   ri   *_BaseShuffleSplit__metadata_request__splitrx   r;   rY  r`   rf   rK   rA   r@   rR  rR  ,  s         < "*+;+B C &(,D& & & & &       D& & & &$   *! ! ! ! !rA   rR  c                   0     e Zd ZdZ	 ddddd fdZ xZS )r'   a  Random permutation cross-validator.

    Yields indices to split data into training and test sets.

    Note: contrary to other cross-validation strategies, random splits
    do not guarantee that test sets across all folds will be mutually exclusive,
    and might include overlapping samples. However, this is still very likely for
    sizeable datasets.

    Read more in the :ref:`User Guide <ShuffleSplit>`.

    For visualisation of cross-validation behaviour and
    comparison between common scikit-learn split methods
    refer to :ref:`sphx_glr_auto_examples_model_selection_plot_cv_indices.py`

    Parameters
    ----------
    n_splits : int, default=10
        Number of re-shuffling & splitting iterations.

    test_size : float or int, default=None
        If float, should be between 0.0 and 1.0 and represent the proportion
        of the dataset to include in the test split. If int, represents the
        absolute number of test samples. If None, the value is set to the
        complement of the train size. If ``train_size`` is also None, it will
        be set to 0.1.

    train_size : float or int, default=None
        If float, should be between 0.0 and 1.0 and represent the
        proportion of the dataset to include in the train split. If
        int, represents the absolute number of train samples. If None,
        the value is automatically set to the complement of the test size.

    random_state : int, RandomState instance or None, default=None
        Controls the randomness of the training and testing indices produced.
        Pass an int for reproducible output across multiple function calls.
        See :term:`Glossary <random_state>`.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.model_selection import ShuffleSplit
    >>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [3, 4], [5, 6]])
    >>> y = np.array([1, 2, 1, 2, 1, 2])
    >>> rs = ShuffleSplit(n_splits=5, test_size=.25, random_state=0)
    >>> rs.get_n_splits(X)
    5
    >>> print(rs)
    ShuffleSplit(n_splits=5, random_state=0, test_size=0.25, train_size=None)
    >>> for i, (train_index, test_index) in enumerate(rs.split(X)):
    ...     print(f"Fold {i}:")
    ...     print(f"  Train: index={train_index}")
    ...     print(f"  Test:  index={test_index}")
    Fold 0:
      Train: index=[1 3 0 4]
      Test:  index=[5 2]
    Fold 1:
      Train: index=[4 0 2 5]
      Test:  index=[1 3]
    Fold 2:
      Train: index=[1 2 4 0]
      Test:  index=[3 5]
    Fold 3:
      Train: index=[3 4 1 0]
      Test:  index=[5 2]
    Fold 4:
      Train: index=[3 5 1 0]
      Test:  index=[2 4]
    >>> # Specify train and test size
    >>> rs = ShuffleSplit(n_splits=5, train_size=0.5, test_size=.25,
    ...                   random_state=0)
    >>> for i, (train_index, test_index) in enumerate(rs.split(X)):
    ...     print(f"Fold {i}:")
    ...     print(f"  Train: index={train_index}")
    ...     print(f"  Test:  index={test_index}")
    Fold 0:
      Train: index=[1 3 0]
      Test:  index=[5 2]
    Fold 1:
      Train: index=[4 0 2]
      Test:  index=[1 3]
    Fold 2:
      Train: index=[1 2 4]
      Test:  index=[3 5]
    Fold 3:
      Train: index=[3 4 1]
      Test:  index=[5 2]
    Fold 4:
      Train: index=[3 5 1]
      Test:  index=[2 4]
    r:  NrS  c                `    t                                          ||||           d| _        d S Nr   r  rT  r   rV  r:   rx   rW  r=   r   r  rT  r   r7   s        r@   rx   zShuffleSplit.__init__   B     	!%	 	 	
 	
 	
 #&rA   re  rM  rG   s   @r@   r'   r'     s`        Z Zz 	&(,D	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	&rA   r'   c                   F     e Zd ZdZ	 ddddd fdZ fdZd	 fd	Z xZS )
r(   a  Shuffle-Group(s)-Out cross-validation iterator.

    Provides randomized train/test indices to split data according to a
    third-party provided group. This group information can be used to encode
    arbitrary domain specific stratifications of the samples as integers.

    For instance the groups could be the year of collection of the samples
    and thus allow for cross-validation against time-based splits.

    The difference between :class:`LeavePGroupsOut` and ``GroupShuffleSplit`` is that
    the former generates splits using all subsets of size ``p`` unique groups,
    whereas ``GroupShuffleSplit`` generates a user-determined number of random
    test splits, each with a user-determined fraction of unique groups.

    For example, a less computationally intensive alternative to
    ``LeavePGroupsOut(p=10)`` would be
    ``GroupShuffleSplit(test_size=10, n_splits=100)``.

    Contrary to other cross-validation strategies, the random splits
    do not guarantee that test sets across all folds will be mutually exclusive,
    and might include overlapping samples. However, this is still very likely for
    sizeable datasets.

    Note: The parameters ``test_size`` and ``train_size`` refer to groups, and
    not to samples as in :class:`ShuffleSplit`.

    Read more in the :ref:`User Guide <group_shuffle_split>`.

    For visualisation of cross-validation behaviour and
    comparison between common scikit-learn split methods
    refer to :ref:`sphx_glr_auto_examples_model_selection_plot_cv_indices.py`

    Parameters
    ----------
    n_splits : int, default=5
        Number of re-shuffling & splitting iterations.

    test_size : float, int, default=None
        If float, should be between 0.0 and 1.0 and represent the proportion
        of groups to include in the test split (rounded up). If int,
        represents the absolute number of test groups. If None, the value is
        set to the complement of the train size. If ``train_size`` is also None,
        it will be set to 0.2.

    train_size : float or int, default=None
        If float, should be between 0.0 and 1.0 and represent the
        proportion of the groups to include in the train split. If
        int, represents the absolute number of train groups. If None,
        the value is automatically set to the complement of the test size.

    random_state : int, RandomState instance or None, default=None
        Controls the randomness of the training and testing indices produced.
        Pass an int for reproducible output across multiple function calls.
        See :term:`Glossary <random_state>`.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.model_selection import GroupShuffleSplit
    >>> X = np.ones(shape=(8, 2))
    >>> y = np.ones(shape=(8, 1))
    >>> groups = np.array([1, 1, 2, 2, 2, 3, 3, 3])
    >>> print(groups.shape)
    (8,)
    >>> gss = GroupShuffleSplit(n_splits=2, train_size=.7, random_state=42)
    >>> gss.get_n_splits()
    2
    >>> print(gss)
    GroupShuffleSplit(n_splits=2, random_state=42, test_size=None, train_size=0.7)
    >>> for i, (train_index, test_index) in enumerate(gss.split(X, y, groups)):
    ...     print(f"Fold {i}:")
    ...     print(f"  Train: index={train_index}, group={groups[train_index]}")
    ...     print(f"  Test:  index={test_index}, group={groups[test_index]}")
    Fold 0:
      Train: index=[2 3 4 5 6 7], group=[2 2 2 3 3 3]
      Test:  index=[0 1], group=[1 1]
    Fold 1:
      Train: index=[0 1 5 6 7], group=[1 1 3 3 3]
      Test:  index=[2 3 4], group=[2 2 2]

    See Also
    --------
    ShuffleSplit : Shuffles samples to create independent test/train sets.

    LeavePGroupsOut : Train set leaves out all possible subsets of `p` groups.
    r   NrS  c                `    t                                          ||||           d| _        d S )Nrj  g?rk  rl  s        r@   rx   zGroupShuffleSplit.__init__d  rm  rA   c              #   ~  K   |t          d          t          |ddd           }t          j        |d          \  }}t	                                          |          D ]Y\  }}t          j        t          j        ||                    }t          j        t          j        ||                    }	||	fV  Zd S )Nr   r4   Fr   Tr   )r>   )ro   r   rN   r   r:   rY  flatnonzeror   )r=   r>   r?   r4   classesgroup_indicesgroup_train
group_testr   r   r7   s             r@   rY  zGroupShuffleSplit._iter_indiceso  s      >IJJJVEQUVVV!#6$!G!G!G',ww'<'<w'<'G'G 	 	#K N27=+#F#FGGE>"'-"D"DEED+	 	rA   c                 J    t                                          |||          S )a  Generate indices to split data into training and test set.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data, where `n_samples` is the number of samples
            and `n_features` is the number of features.

        y : array-like of shape (n_samples,), default=None
            The target variable for supervised learning problems.

        groups : array-like of shape (n_samples,)
            Group labels for the samples used while splitting the dataset into
            train/test set.

        Yields
        ------
        train : ndarray
            The training set indices for that split.

        test : ndarray
            The testing set indices for that split.

        Notes
        -----
        Randomized CV splitters may return different results for each call of
        split. You can make the results identical by setting `random_state`
        to an integer.
        r   r<   s       r@   r;   zGroupShuffleSplit.split}  s    < ww}}Q6***rA   r   rB   r8   rC   rD   rE   rx   rY  r;   rF   rG   s   @r@   r(   r(     s        U Up 	&'+4	& 	& 	& 	& 	& 	& 	&    + + + + + + + + + +rA   r(   c                   D     e Zd ZdZ	 ddddd fdZd	dZd	 fd	Z xZS )
r+   aZ  Stratified ShuffleSplit cross-validator.

    Provides train/test indices to split data in train/test sets.

    This cross-validation object is a merge of :class:`StratifiedKFold` and
    :class:`ShuffleSplit`, which returns stratified randomized folds. The folds
    are made by preserving the percentage of samples for each class.

    Note: like the :class:`ShuffleSplit` strategy, stratified random splits
    do not guarantee that test sets across all folds will be mutually exclusive,
    and might include overlapping samples. However, this is still very likely for
    sizeable datasets.

    Read more in the :ref:`User Guide <stratified_shuffle_split>`.

    For visualisation of cross-validation behaviour and
    comparison between common scikit-learn split methods
    refer to :ref:`sphx_glr_auto_examples_model_selection_plot_cv_indices.py`

    Parameters
    ----------
    n_splits : int, default=10
        Number of re-shuffling & splitting iterations.

    test_size : float or int, default=None
        If float, should be between 0.0 and 1.0 and represent the proportion
        of the dataset to include in the test split. If int, represents the
        absolute number of test samples. If None, the value is set to the
        complement of the train size. If ``train_size`` is also None, it will
        be set to 0.1.

    train_size : float or int, default=None
        If float, should be between 0.0 and 1.0 and represent the
        proportion of the dataset to include in the train split. If
        int, represents the absolute number of train samples. If None,
        the value is automatically set to the complement of the test size.

    random_state : int, RandomState instance or None, default=None
        Controls the randomness of the training and testing indices produced.
        Pass an int for reproducible output across multiple function calls.
        See :term:`Glossary <random_state>`.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.model_selection import StratifiedShuffleSplit
    >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [1, 2], [3, 4]])
    >>> y = np.array([0, 0, 0, 1, 1, 1])
    >>> sss = StratifiedShuffleSplit(n_splits=5, test_size=0.5, random_state=0)
    >>> sss.get_n_splits(X, y)
    5
    >>> print(sss)
    StratifiedShuffleSplit(n_splits=5, random_state=0, ...)
    >>> for i, (train_index, test_index) in enumerate(sss.split(X, y)):
    ...     print(f"Fold {i}:")
    ...     print(f"  Train: index={train_index}")
    ...     print(f"  Test:  index={test_index}")
    Fold 0:
      Train: index=[5 2 3]
      Test:  index=[4 1 0]
    Fold 1:
      Train: index=[5 1 4]
      Test:  index=[0 2 3]
    Fold 2:
      Train: index=[5 0 2]
      Test:  index=[4 3 1]
    Fold 3:
      Train: index=[4 1 0]
      Test:  index=[2 3 5]
    Fold 4:
      Train: index=[0 5 1]
      Test:  index=[3 4 2]
    r:  NrS  c                `    t                                          ||||           d| _        d S ri  rk  rl  s        r@   rx   zStratifiedShuffleSplit.__init__  rm  rA   c           	   #   *  K   t          |          }t          |ddd           }t          || j        | j        | j                  \  }}t          |          \  }}t          ||          }|j        dk    rt          j
        d |D                       }t          j        |d	          \  }	}
|	j        d
         }t          j        |
          }t          j        |          dk     rt          d          ||k     rt          d||fz            ||k     rt          d||fz            t          j        t          j        |
d          t          j        |          d d                   }t'          | j                  }t+          | j                  D ]}t/          |||          }||z
  }t/          |||          }g }g }t+          |          D ]}|                    ||                   }||                             |d          }|                    |d ||                             |                    |||         ||         ||         z                       |                    |          }|                    |          }||fV  d S )Nr?   Fr   r\  )r   r   c                 ^    g | ]*}d                      |                    d                    +S ) str)joinastype)r   rows     r@   r   z8StratifiedShuffleSplit._iter_indices.<locals>.<listcomp>	  s0    CCC##((3::e#4#455CCCrA   Tr   r   zThe least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.zLThe train_size = %d should be greater or equal to the number of classes = %dzKThe test_size = %d should be greater or equal to the number of classes = %dr   r   r   clip)mode)r   r   r^  r  rT  rW  r   r   ndimrN   rz   r   shaper   r   ro   r;   r   cumsumr   r   rq   r   r   r   takeextend)r=   r>   r?   r4   rr   r_  r`  r   r   rr  	y_indicesr   class_countsclass_indicesr   n_iclass_counts_remainingt_ir   r   r   r   perm_indices_class_is                          r@   rY  z$StratifiedShuffleSplit._iter_indices  s      OO	cU$GGG1NO"5	
 
 
 a  AaB'''6Q;; CCCCCDDAYq>>>M!$	{9--6,!###   Y69@)8LM   I69?8KL   Jy{333RY|5L5LSbS5Q
 
 !!233t}%% 	 	A $L'3??C%1C%7"#$:FCHHCED9%% L L!ool1o>>'4Q'7'<'<[v'<'V'V$1(CF(;<<<0Q#a&3q6/1IJKKKKOOE**E??4((D+)	 	rA   c                     |'t          j        d| j        j         t                     t          |ddd          }t                                          |||          S )a  Generate indices to split data into training and test set.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data, where `n_samples` is the number of samples
            and `n_features` is the number of features.

            Note that providing ``y`` is sufficient to generate the splits and
            hence ``np.zeros(n_samples)`` may be used as a placeholder for
            ``X`` instead of actual training data.

        y : array-like of shape (n_samples,) or (n_samples, n_labels)
            The target variable for supervised learning problems.
            Stratification is done based on the y labels.

        groups : object
            Always ignored, exists for compatibility.

        Yields
        ------
        train : ndarray
            The training set indices for that split.

        test : ndarray
            The testing set indices for that split.

        Notes
        -----
        Randomized CV splitters may return different results for each call of
        split. You can make the results identical by setting `random_state`
        to an integer.
        Nr2   r?   Fr   r   r<   s       r@   r;   zStratifiedShuffleSplit.split>	  r   rA   re  rb   rw  rG   s   @r@   r+   r+     s        H HV 	&(,D	& 	& 	& 	& 	& 	& 	&H H H HT(+ (+ (+ (+ (+ (+ (+ (+ (+ (+rA   r+   c                 d   |||}t          j        |          j        j        }t          j        |          j        j        }|dk    r|| k    s|dk    s|dk    r/|dk    s|dk    r#t	          d                    ||                     |dk    r|| k    s|dk    s|dk    r/|dk    s|dk    r#t	          d                    ||                     |&|dvr"t	          d	                    |                    |&|dvr"t	          d
                    |                    |dk    r4|dk    r.||z   dk    r%t	          d                    ||z                       |dk    rt          || z            }n|dk    rt          |          }|dk    rt          || z            }n|dk    rt          |          }|| |z
  }n|| |z
  }||z   | k    rt	          d||z   | fz            t          |          t          |          }}|dk    r$t	          d                    | ||                    ||fS )zx
    Validation helper to check if the train/test sizes are meaningful w.r.t. the
    size of the data (n_samples).
    Nr   r   r   rn   zqtest_size={0} should be either positive and smaller than the number of samples {1} or a float in the (0, 1) rangezrtrain_size={0} should be either positive and smaller than the number of samples {1} or a float in the (0, 1) range)r   r   z Invalid value for train_size: {}zInvalid value for test_size: {}zlThe sum of test_size and train_size = {}, should be in the (0, 1) range. Reduce test_size and/or train_size.z~The sum of train_size and test_size = %d, should be smaller than the number of samples %d. Reduce test_size and/or train_size.zWith n_samples={}, test_size={} and train_size={}, the resulting train set will be empty. Adjust any of the aforementioned parameters.)
rN   r   rW   r   ro   rp   r
   floatr   r   )rr   r  rT  r]  test_size_typetrain_size_typer`  r_  s           r@   r^  r^  i	  s   
 Z/%	Z	**05Nj,,27O 	#)##yA~~S  !^^yA~~!6)Y77
 
 	
 	39$$
ac!!1__
a!6*i88
 
 	
 /"C"C;BB:NNOOOz!A!A:AA)LLMMM#.C"7"7J<RUV<V<V::@&iAW:X:X
 
 	

 i)+,,	3		y!!#
Y.//	C		
##f$		W$)## %v-y9:
 
 	
 'llCKKVG!||))/	9j)Q)Q
 
 	
 F?rA   c                   4    e Zd ZdZd ZddZd Zd ZddZdS )	r,   a=  Predefined split cross-validator.

    Provides train/test indices to split data into train/test sets using a
    predefined scheme specified by the user with the ``test_fold`` parameter.

    Read more in the :ref:`User Guide <predefined_split>`.

    .. versionadded:: 0.16

    Parameters
    ----------
    test_fold : array-like of shape (n_samples,)
        The entry ``test_fold[i]`` represents the index of the test set that
        sample ``i`` belongs to. It is possible to exclude sample ``i`` from
        any test set (i.e. include sample ``i`` in every training set) by
        setting ``test_fold[i]`` equal to -1.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.model_selection import PredefinedSplit
    >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
    >>> y = np.array([0, 0, 1, 1])
    >>> test_fold = [0, 1, -1, 1]
    >>> ps = PredefinedSplit(test_fold)
    >>> ps.get_n_splits()
    2
    >>> print(ps)
    PredefinedSplit(test_fold=array([ 0,  1, -1,  1]))
    >>> for i, (train_index, test_index) in enumerate(ps.split()):
    ...     print(f"Fold {i}:")
    ...     print(f"  Train: index={train_index}")
    ...     print(f"  Test:  index={test_index}")
    Fold 0:
      Train: index=[1 2 3]
      Test:  index=[0]
    Fold 1:
      Train: index=[0 2]
      Test:  index=[1 3]
    c                     t          j        |t                    | _        t	          | j                  | _        t          j        | j                  | _        | j        | j        dk             | _        d S )NrV   r   )rN   rz   r   	test_foldr   r   unique_folds)r=   r  s     r@   rx   zPredefinedSplit.__init__	  s[    )3777%dn55Idn55 -d.?2.EFrA   Nc                 |    |'t          j        d| j        j         t                     |                                 S )  Generate indices to split data into training and test set.

        Parameters
        ----------
        X : object
            Always ignored, exists for compatibility.

        y : object
            Always ignored, exists for compatibility.

        groups : object
            Always ignored, exists for compatibility.

        Yields
        ------
        train : ndarray
            The training set indices for that split.

        test : ndarray
            The testing set indices for that split.
        Nr2   r"  r^   s       r@   r;   zPredefinedSplit.split	  sA    , MOdn6MOO   {{}}rA   c              #      K   t          j        t          | j                            }|                                 D ]*}|t          j        |                   }||         }||fV  +dS )zGenerate indices to split data into training and test set.

        Yields
        ------
        train : ndarray
            The training set indices for that split.

        test : ndarray
            The testing set indices for that split.
        N)rN   rO   r   r  rP   rQ   )r=   indrS   rT   s       r@   r#  zPredefinedSplit._split
  sw       iDN++,,//11 	* 	*JbnZ889KZJz)))))	* 	*rA   c              #      K   | j         D ][}t          j        | j        |k              d         }t          j        t          | j                  t                    }d||<   |V  \dS )z3Generates boolean masks corresponding to test sets.r   rV   TN)r  rN   r   r  rY   r   rZ   )r=   r   rS   r[   s       r@   rP   z PredefinedSplit._iter_test_masks
  sp      " 	 	A$.A"566q9JT^!4!4DAAAI$(Ij!OOOO		 	rA   c                 *    t          | j                  S r   )r   r  r^   s       r@   r`   zPredefinedSplit.get_n_splits 
  s    & 4$%%%rA   rh   )	r8   rC   rD   rE   rx   r;   r#  rP   r`   rK   rA   r@   r,   r,   	  sw        ' 'RG G G   :* * *"  & & & & & &rA   r,   c                   (    e Zd ZdZd ZddZddZdS )_CVIterableWrapperz5Wrapper class for old style cv objects and iterables.c                 .    t          |          | _        d S rb   )listrC  )r=   rC  s     r@   rx   z_CVIterableWrapper.__init__9
  s    r((rA   Nc                 *    t          | j                  S r   )r   rC  r^   s       r@   r`   z_CVIterableWrapper.get_n_splits<
  s    & 47||rA   c              #   0   K   | j         D ]\  }}||fV  dS )r  N)rC  rZ  s         r@   r;   z_CVIterableWrapper.splitQ
  s:      ,  7 	 	KE4+	 	rA   rh   )r8   rC   rD   rE   rx   r`   r;   rK   rA   r@   r  r  6
  sQ        ??     *     rA   r  r   F)
classifierc                   | dn| } t          | t          j                  r5|r$|"t          |d          dv rt	          |           S t          |           S t          | d          rt          | t                    rKt          | t                    rt          | t                    rt          d| z            t          |           S | S )a  Input checker utility for building a cross-validator.

    Parameters
    ----------
    cv : int, cross-validation generator, iterable or None, default=5
        Determines the cross-validation splitting strategy.
        Possible inputs for cv are:
        - None, to use the default 5-fold cross validation,
        - integer, to specify the number of folds.
        - :term:`CV splitter`,
        - An iterable that generates (train, test) splits as arrays of indices.

        For integer/None inputs, if classifier is True and ``y`` is either
        binary or multiclass, :class:`StratifiedKFold` is used. In all other
        cases, :class:`KFold` is used.

        Refer :ref:`User Guide <cross_validation>` for the various
        cross-validation strategies that can be used here.

        .. versionchanged:: 0.22
            ``cv`` default value changed from 3-fold to 5-fold.

    y : array-like, default=None
        The target variable for supervised learning problems.

    classifier : bool, default=False
        Whether the task is a classification task, in which case
        stratified KFold will be used.

    Returns
    -------
    checked_cv : a cross-validator instance.
        The return value is a cross-validator which generates the train/test
        splits via the ``split`` method.

    Examples
    --------
    >>> from sklearn.model_selection import check_cv
    >>> check_cv(cv=5, y=None, classifier=False)
    KFold(...)
    >>> check_cv(cv=5, y=[1, 1, 0, 0, 0, 0], classifier=True)
    StratifiedKFold(...)
    Nr   r?   )r   r   r;   ziExpected cv as an integer, cross-validation object (from sklearn.model_selection) or an iterable. Got %s.)r   r   r   r   r)   r   hasattrr}  r   ro   r  )rC  r?   r  s      r@   r.   r.   k
  s    X jbB"g&'' 	c2226NNN"2&&&992w &:b##6#6 &"h'' 	:b#+>+> 	*,./  
 ""%%%IrA   rn   neither)closedleftr   booleanz
array-like)r  rT  r   r   stratifyT)prefer_skip_nested_validationc                 b   t          |          }|dk    rt          d          t          | }t          |d                   }t	          || |d          \  }}	|du r>|t          d          t          j        |          t          j        |||	z             nL|t          }
nt          }
 |
|	||          }t          |
                    |d         |	                    \  t          |d                   \  t          t          j        fd
|D                                 S )a  Split arrays or matrices into random train and test subsets.

    Quick utility that wraps input validation,
    ``next(ShuffleSplit().split(X, y))``, and application to input data
    into a single call for splitting (and optionally subsampling) data into a
    one-liner.

    Read more in the :ref:`User Guide <cross_validation>`.

    Parameters
    ----------
    *arrays : sequence of indexables with same length / shape[0]
        Allowed inputs are lists, numpy arrays, scipy-sparse
        matrices or pandas dataframes.

    test_size : float or int, default=None
        If float, should be between 0.0 and 1.0 and represent the proportion
        of the dataset to include in the test split. If int, represents the
        absolute number of test samples. If None, the value is set to the
        complement of the train size. If ``train_size`` is also None, it will
        be set to 0.25.

    train_size : float or int, default=None
        If float, should be between 0.0 and 1.0 and represent the
        proportion of the dataset to include in the train split. If
        int, represents the absolute number of train samples. If None,
        the value is automatically set to the complement of the test size.

    random_state : int, RandomState instance or None, default=None
        Controls the shuffling applied to the data before applying the split.
        Pass an int for reproducible output across multiple function calls.
        See :term:`Glossary <random_state>`.

    shuffle : bool, default=True
        Whether or not to shuffle the data before splitting. If shuffle=False
        then stratify must be None.

    stratify : array-like, default=None
        If not None, data is split in a stratified fashion, using this as
        the class labels.
        Read more in the :ref:`User Guide <stratification>`.

    Returns
    -------
    splitting : list, length=2 * len(arrays)
        List containing train-test split of inputs.

        .. versionadded:: 0.16
            If the input is sparse, the output will be a
            ``scipy.sparse.csr_matrix``. Else, output type is the same as the
            input type.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.model_selection import train_test_split
    >>> X, y = np.arange(10).reshape((5, 2)), range(5)
    >>> X
    array([[0, 1],
           [2, 3],
           [4, 5],
           [6, 7],
           [8, 9]])
    >>> list(y)
    [0, 1, 2, 3, 4]

    >>> X_train, X_test, y_train, y_test = train_test_split(
    ...     X, y, test_size=0.33, random_state=42)
    ...
    >>> X_train
    array([[4, 5],
           [0, 1],
           [6, 7]])
    >>> y_train
    [2, 0, 3]
    >>> X_test
    array([[2, 3],
           [8, 9]])
    >>> y_test
    [1, 4]

    >>> train_test_split(y, shuffle=False)
    [[0, 1, 2], [3, 4]]
    r   z$At least one array required as inputg      ?r\  FNz@Stratified train/test split is not implemented for shuffle=FalserS  )r>   r?   c              3   X   K   | ]$}t          |          t          |          fV  %d S rb   )r   )r   ar   r   s     r@   r@  z#train_test_split.<locals>.<genexpr>=  sM       
 
DE^Au%%~a'>'>?
 
 
 
 
 
rA   )r   ro   r   r   r^  rN   rO   r+   r'   nextr;   r   r  r   from_iterable)r  rT  r   r   r  arraysn_arraysrr   r_  r`  CVClassrC  r   r   s               @@r@   r-   r-   
  sl   \ 6{{H1}}?@@@FVAY''I-9jD  OGV %R   	'""y'F"233 ,GG"GWv'UUU288fQi88<<==t0E4HHKE4 
 
 
 
 
IO
 
 
 	
 	
  rA   __test__c                    t          j                    }t          j        ddd           t                      }|}dd|dz  z   dz  z   }t	          t          |                                                     D ]\  }\  }}	t          |	t                    r|dt          |	          }
n|d ||	          }
t          |
          d	k    r|
d
d         dz   |
dd
         z   }
|dk    rY|t          |
          z   dk    sd|
v r%|                    |           t          |          }n|                    d           |dz  }|                    |
           |t          |
          z  }t          j        di | d                    |          }d                    d |                    d          D                       }|S )af  Pretty print the dictionary 'params'

    Parameters
    ----------
    params : dict
        The dictionary to pretty print

    offset : int, default=0
        The offset in characters to add at the begin of each line.

    printer : callable, default=repr
        The function to convert entries to strings, typically
        the builtin str or repr

    r   @   r   )	precision	threshold	edgeitemsz,
rn   r|  =i  Ni,  z...ir   K   
z,  c              3   @   K   | ]}|                     d           V  dS )r|  N)rstrip)r   r5  s     r@   r@  z_pprint.<locals>.<genexpr>w  s,      ??ahhsmm??????rA   rK   )rN   get_printoptionsset_printoptionsr  r   sorteditemsr   r  r}  r   appendr~  r;   )paramsoffsetprinteroptionsparams_listthis_line_lengthline_sepr   r   v	this_reprliness               r@   _pprintr  I  s   " !##G!rQ????&&KFaK3..Hvfllnn5566 + +	6Aqa 	2 $%11c!fff-II $%11ggajjj1Iy>>C!$3$%/)DEE2BBIq55#i..0B66$):K:K""8,,,#&x==  ""4((( A% 9%%%C	NN*""'"""GGK  EII??U[[->->?????ELrA   c                    | j         }t          |j        d|j                  }t          |          }|t          j        u rg }n0t          d |j                                        D                       }| j         j        }t                      }|D ]}t          j        dt                     	 t          j        d          5 }t          | |d           }	|	+t          | d          r| j                            |d           }	d d d            n# 1 swxY w Y   t#          |          r5|d         j        t          u r!	 t          j                            d           t          j                            d           n$# t          j                            d           w xY w|	||<   |dt+          |t#          |          	          d
S )Ndeprecated_originalc                 R    g | ]$}|j         d k    |j        |j        k    |j         %S re   )namer   VAR_KEYWORD)r   rw   s     r@   r   z_build_repr.<locals>.<listcomp>  s?       6V##!-(?(? (?(?(?rA   alwaysT)recordr?  r   ()r  ))r7   getattrrx   r   objectr  
parametersvaluesr8   dictr5   simplefilterFutureWarningcatch_warningsr  r?  getr   categoryfilterspopr  )
r=   clsinitinit_signatureargs
class_namer  r>  wvalues
             r@   rd   rd   {  s   
.C3<!6EEDt__Nv '299;;  
 
 (JVVF  
 	h666		$(555 7c400=WT8%<%<= KOOC66E7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 1vv !A$-=88  ####H  ####H  ####s!zz76#j//#J#J#J#J#JKKs0   -E<?DE<D	E<D	&E<<!Fc                     t          | dd          }t          | dd          }t          |t          j                  p| S )Nr   Tr   r   )r  r   r   r   )rC  r   r   s      r@   _yields_constant_splitsr    s@     b)T**G2~q11LlG$455DWDrA   rb   )r   N)MrE   r   r5   abcr   r   collectionsr   collections.abcr   inspectr   	itertoolsr   r	   mathr
   r   numpyrN   scipy.specialr   utilsr   r   r   r   utils._array_apir   r   r   utils._param_validationr   r   r   utils.extmathr   utils.metadata_routingr   utils.multiclassr   utils.validationr   r   r   __all__r0   rI   r   r"   r$   r   r   r    r)   r*   r  r!   r#   r9  r&   r%   rR  r'   r(   r+   r^  r,   r  r.   r   r-   setattrreprr  rd   r  rK   rA   r@   <module>r     s      ' ' ' ' ' ' ' ' # # # # # # $ $ $ $ $ $       ) ) ) ) ) ) ) )                                      
 L K K K K K K K K K - - - - - - 7 7 7 7 7 7 - - - - - - F F F F F F F F F F  *2 2 2 2 2 2 2 2D1 1 1 1 1, 1 1 1@! @! @! @! @!+w @! @! @! @!FL L L L L*,> L L L^Y> Y> Y> Y> Y>(*< Y> Y> Y>x[ [ [ [ [#w [ [ [ [|] ] ] ] ]$j ] ] ]@W+ W+ W+ W+ W+$j W+ W+ W+tD+ D+ D+ D+ D+j D+ D+ D+NK K K K K.
 K K K\Q Q Q Q Qj Q Q Qht+ t+ t+ t+ t+*,> t+ t+ t+nB+ B+ B+ B+ B+)+= B+ B+ B+Jh! h! h! h! h!(G h! h! h! h!V?
 ?
 ?
 ?
 ?
,o ?
 ?
 ?
Di2 i2 i2 i2 i26 i2 i2 i2Xt! t! t! t! t!)W t! t! t! t!nf& f& f& f& f&+-= f& f& f&RO+ O+ O+ O+ O++-= O+ O+ O+dH+ H+ H+ H+ H+- H+ H+ H+VN N N Nby& y& y& y& y&( y& y& y&x2 2 2 2 2+ 2 2 2j@ @ @ @ @ @F  HZAi888HW%q$v>>>
 HZAi888HW%q$v>>>

 ((;!4(  #'!  ( @ @ @ @% $@L *e , , , d / / / /d%L %L %LPE E E E ErA   