
    0Pht_                         d Z ddlZddlmZmZ ddlZddlmZ	 ddl
mZmZmZmZmZ ddlmZ ddlmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZmZmZm Z m!Z!  G d deee          Z" G d deee          Z#dS )z6Dummy estimators that implement simple rules of thumb.    N)IntegralReal   )BaseEstimatorClassifierMixinMultiOutputMixinRegressorMixin_fit_context)check_random_state)Interval
StrOptions)class_distribution)_random_choice_csc_weighted_percentile)_check_sample_weight_num_samplescheck_arraycheck_consistent_lengthcheck_is_fittedvalidate_datac                        e Zd ZU dZ eh d          gdgeeddgdZee	d<   ddddd	Z
 ed
          dd            Zd Zd Zd Z fdZd fd	Z xZS )DummyClassifiera]  DummyClassifier makes predictions that ignore the input features.

    This classifier serves as a simple baseline to compare against other more
    complex classifiers.

    The specific behavior of the baseline is selected with the `strategy`
    parameter.

    All strategies make predictions that ignore the input feature values passed
    as the `X` argument to `fit` and `predict`. The predictions, however,
    typically depend on values observed in the `y` parameter passed to `fit`.

    Note that the "stratified" and "uniform" strategies lead to
    non-deterministic predictions that can be rendered deterministic by setting
    the `random_state` parameter if needed. The other strategies are naturally
    deterministic and, once fit, always return the same constant prediction
    for any value of `X`.

    Read more in the :ref:`User Guide <dummy_estimators>`.

    .. versionadded:: 0.13

    Parameters
    ----------
    strategy : {"most_frequent", "prior", "stratified", "uniform",             "constant"}, default="prior"
        Strategy to use to generate predictions.

        * "most_frequent": the `predict` method always returns the most
          frequent class label in the observed `y` argument passed to `fit`.
          The `predict_proba` method returns the matching one-hot encoded
          vector.
        * "prior": the `predict` method always returns the most frequent
          class label in the observed `y` argument passed to `fit` (like
          "most_frequent"). ``predict_proba`` always returns the empirical
          class distribution of `y` also known as the empirical class prior
          distribution.
        * "stratified": the `predict_proba` method randomly samples one-hot
          vectors from a multinomial distribution parametrized by the empirical
          class prior probabilities.
          The `predict` method returns the class label which got probability
          one in the one-hot vector of `predict_proba`.
          Each sampled row of both methods is therefore independent and
          identically distributed.
        * "uniform": generates predictions uniformly at random from the list
          of unique classes observed in `y`, i.e. each class has equal
          probability.
        * "constant": always predicts a constant label that is provided by
          the user. This is useful for metrics that evaluate a non-majority
          class.

          .. versionchanged:: 0.24
             The default value of `strategy` has changed to "prior" in version
             0.24.

    random_state : int, RandomState instance or None, default=None
        Controls the randomness to generate the predictions when
        ``strategy='stratified'`` or ``strategy='uniform'``.
        Pass an int for reproducible output across multiple function calls.
        See :term:`Glossary <random_state>`.

    constant : int or str or array-like of shape (n_outputs,), default=None
        The explicit constant as predicted by the "constant" strategy. This
        parameter is useful only for the "constant" strategy.

    Attributes
    ----------
    classes_ : ndarray of shape (n_classes,) or list of such arrays
        Unique class labels observed in `y`. For multi-output classification
        problems, this attribute is a list of arrays as each output has an
        independent set of possible classes.

    n_classes_ : int or list of int
        Number of label for each output.

    class_prior_ : ndarray of shape (n_classes,) or list of such arrays
        Frequency of each class observed in `y`. For multioutput classification
        problems, this is computed independently for each output.

    n_features_in_ : int
        Number of features seen during :term:`fit`.

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during :term:`fit`. Defined only when `X` has
        feature names that are all strings.

    n_outputs_ : int
        Number of outputs.

    sparse_output_ : bool
        True if the array returned from predict is to be in sparse CSC format.
        Is automatically set to True if the input `y` is passed in sparse
        format.

    See Also
    --------
    DummyRegressor : Regressor that makes predictions using simple rules.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.dummy import DummyClassifier
    >>> X = np.array([-1, 1, 1, 1])
    >>> y = np.array([0, 1, 1, 1])
    >>> dummy_clf = DummyClassifier(strategy="most_frequent")
    >>> dummy_clf.fit(X, y)
    DummyClassifier(strategy='most_frequent')
    >>> dummy_clf.predict(X)
    array([1, 1, 1, 1])
    >>> dummy_clf.score(X, y)
    0.75
    >   prioruniformconstant
stratifiedmost_frequentrandom_state
array-likeNstrategyr   r   _parameter_constraintsr   c                0    || _         || _        || _        d S Nr!   )selfr"   r   r   s       M/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/sklearn/dummy.py__init__zDummyClassifier.__init__   s     (     Tprefer_skip_nested_validationc                    t          | |d           | j        | _        | j        dk    rBt          j        |          r.|                                }t          j        dt                     t          j        |          | _	        | j	        s(t          j        |          }t          j        |          }|j        dk    rt          j        |d          }|j        d         | _        t#          ||           |t%          ||          }| j        dk    ro| j        t)          d	          t          j        t          j        | j                  d          j        d
         | j        k    rt)          d| j        z            t+          ||          \  | _        | _        | _        | j        dk    rt3          | j                  D ]pt5          fd| j                 D                       sGd                    | j        | j                                                           }t)          |          q| j        dk    r6| j        d
         | _        | j        d
         | _        | j        d
         | _        | S )a  Fit the baseline classifier.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data.

        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            Target values.

        sample_weight : array-like of shape (n_samples,), default=None
            Sample weights.

        Returns
        -------
        self : object
            Returns the instance itself.
        Tskip_check_arrayr   zA local copy of the target data has been converted to a numpy array. Predicting on sparse target data with the uniform strategy would not save memory and would be slower.r   r   Nr   MConstant target value has to be specified when the constant strategy is used.r   0Constant target value should have shape (%d, 1).c              3   <   K   | ]}         d          |k    V  dS )r   N ).0cr   ks     r'   	<genexpr>z&DummyClassifier.fit.<locals>.<genexpr>   s0      II18A;q>Q.IIIIIIr)   zrThe constant target value must be present in the training data. You provided constant={}. Possible values are: {}.)r   r"   	_strategyspissparsetoarraywarningswarnUserWarningsparse_output_npasarray
atleast_1dndimreshapeshape
n_outputs_r   r   r   
ValueErrorr   classes_
n_classes_class_prior_rangeanyformattolist)r&   Xysample_weighterr_msgr   r7   s        @@r'   fitzDummyClassifier.fit   sr   ( 	dA5555>Y&&2;q>>&		AM+
    !k!nn" 	!
1Aa  A6Q;;
1g&&A'!*1%%%$0BBM>Z''}$ :  
 :bmDM&B&BGLL>!$77$J/*  
 ?Q}?
 ?
;): >Z''4?++ . .IIIIIa8HIIIII 
.3396 M4=+;+B+B+D+D4 4  %W---
. ?a"oa0DO M!,DM $ 1! 4Dr)   c                 J  	
 t          |            t          |          	t          | j                  | j        | j        | j        | j        }| j        dk    rggg|g}| j	        dk    r#| 
                    |          
| j        dk    r
g
| j        rpd}| j	        dv rd D             n?| j	        dk    r}n1| j	        dk    rt          d          | j	        dk    rd	 |D             t          	|| j                  }n| j	        dv r7t          j        fd
t!          | j                  D             	dg          }n| j	        dk    r9t          j        
fdt!          | j                  D                       j        }no| j	        dk    r=	fdt!          | j                  D             }t          j        |          j        }n'| j	        dk    rt          j        | j        	df          }| j        dk    rt          j        |          }|S )a;  Perform classification on test vectors X.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Test data.

        Returns
        -------
        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            Predicted target values for X.
        r   r   N)r   r   c                 \    g | ])}t          j        |                                g          *S r4   )rA   arrayargmax)r5   cps     r'   
<listcomp>z+DummyClassifier.predict.<locals>.<listcomp>#  s,    KKKBHbiikk]33KKKr)   r   zCSparse target prediction is not supported with the uniform strategyr   c                 8    g | ]}t          j        |g          S r4   )rA   rW   )r5   r6   s     r'   rZ   z+DummyClassifier.predict.<locals>.<listcomp>/  s"    <<<aBHaSMM<<<r)   c                 \    g | ](}|         |                                                   )S r4   rX   )r5   r7   rK   rI   s     r'   rZ   z+DummyClassifier.predict.<locals>.<listcomp>5  sB        !LO$:$:$<$<=  r)   c                 `    g | ]*}|         |                              d                    +S )r   axisr]   )r5   r7   rI   probas     r'   rZ   z+DummyClassifier.predict.<locals>.<listcomp>>  sD        !E!HOOO$;$;<  r)   c                 b    g | ]+}|                              |                             ,S )size)randint)r5   r7   rI   rJ   	n_samplesrss     r'   rZ   z+DummyClassifier.predict.<locals>.<listcomp>E  sF        QK

:a=y
 I IJ  r)   )r   r   r   r   rJ   rI   rK   r   rG   r9   predict_probar@   rH   r   rA   tilerL   vstackTravel)r&   rP   r   
class_probrQ   retrK   rI   rJ   rf   ra   rg   s         @@@@@@r'   predictzDummyClassifier.predict   s    	 !OO	 122_
=(=?a$J zH(>L zH>\))&&q))E!## /	 J~!;;;KKlKKK<//)

9,, :  
 :--<<8<<<"9h
DDUVVAA~!;;;G    !&t!7!7   N  <//I    !&t!7!7   
   9,,      "4?33   IcNN$:--GDMIq>::!##HQKKr)   c                 ,   t          |            t          |          }t          | j                  }| j        }| j        }| j        }| j        }| j        dk    r|g}|g}|g}|g}g }t          | j                  D ]}	| j
        dk    rM||	                                         }
t          j        |||	         ft          j                  }d|dd|
f<   n| j
        dk    r t          j        |df          ||	         z  }n| j
        dk    r>|                    d||	         |          }|                    t          j                  }n| j
        d	k    r4t          j        |||	         ft          j                  }|||	         z  }n`| j
        d
k    rUt          j        ||	         ||	         k              }
t          j        |||	         ft          j                  }d|dd|
f<   |                    |           | j        dk    r|d         }|S )a  
        Return probability estimates for the test vectors X.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Test data.

        Returns
        -------
        P : ndarray of shape (n_samples, n_classes) or list of such arrays
            Returns the probability of the sample for each class in
            the model, where classes are ordered arithmetically, for each
            output.
        r   r   dtype      ?Nr   r   rc   r   r   r   )r   r   r   r   rJ   rI   rK   r   rG   rL   r9   rX   rA   zerosfloat64onesmultinomialastypewhereappend)r&   rP   rf   rg   rJ   rI   rK   r   Pr7   indouts               r'   rh   zDummyClassifier.predict_probaS  s     	 !OO	 122_
=(=?a$J zH(>L zHt'' 	 	A~00"1o,,..h	:a=9LLL!AAAsF7**gy!n--Q?<//nnQQinHHjj,,9,,gy*Q-8
KKKz!}$:--hx{hqk9::h	:a=9LLL!AAAsFHHSMMMM?a!Ar)   c                     |                      |          }| j        dk    rt          j        |          S d |D             S )a  
        Return log probability estimates for the test vectors X.

        Parameters
        ----------
        X : {array-like, object with finite length or shape}
            Training data.

        Returns
        -------
        P : ndarray of shape (n_samples, n_classes) or list of such arrays
            Returns the log probability of the sample for each class in
            the model, where classes are ordered arithmetically for each
            output.
        r   c                 6    g | ]}t          j        |          S r4   )rA   log)r5   ps     r'   rZ   z5DummyClassifier.predict_log_proba.<locals>.<listcomp>  s     ---!BF1II---r)   )rh   rG   rA   r   )r&   rP   ra   s      r'   predict_log_probaz!DummyClassifier.predict_log_proba  sF      ""1%%?a6%== --u----r)   c                     t                                                      }d|j        _        d|j        _        d|_        |S NT)super__sklearn_tags__
input_tagssparseclassifier_tags
poor_scoreno_validationr&   tags	__class__s     r'   r   z DummyClassifier.__sklearn_tags__  s:    ww''))!%*.'!r)   c                     |$t          j        t          |          df          }t                                          |||          S )ak  Return the mean accuracy on the given test data and labels.

        In multi-label classification, this is the subset accuracy
        which is a harsh metric since you require for each sample that
        each label set be correctly predicted.

        Parameters
        ----------
        X : None or array-like of shape (n_samples, n_features)
            Test samples. Passing None as test samples gives the same result
            as passing real test samples, since DummyClassifier
            operates independently of the sampled observations.

        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            True labels for X.

        sample_weight : array-like of shape (n_samples,), default=None
            Sample weights.

        Returns
        -------
        score : float
            Mean accuracy of self.predict(X) w.r.t. y.
        Nr   rF   rA   rt   lenr   scorer&   rP   rQ   rR   r   s       r'   r   zDummyClassifier.score  s@    2 9A{+++Aww}}Q=111r)   r%   )__name__
__module____qualname____doc__r   r   strr#   dict__annotations__r(   r
   rT   ro   rh   r   r   r   __classcell__r   s   @r'   r   r   "   s9        o of JVVVWW
 ((sL$7$ $D    $+ ! ! ! ! !
 \555X X X 65XtU U Un= = =~. . .,    2 2 2 2 2 2 2 2 2 2r)   r   c                        e Zd ZU dZ eh d          g eeddd          dg eeddd          d	dgd
Zee	d<   dddddZ
 ed          dd            ZddZ fdZd fd	Z xZS )DummyRegressora  Regressor that makes predictions using simple rules.

    This regressor is useful as a simple baseline to compare with other
    (real) regressors. Do not use it for real problems.

    Read more in the :ref:`User Guide <dummy_estimators>`.

    .. versionadded:: 0.13

    Parameters
    ----------
    strategy : {"mean", "median", "quantile", "constant"}, default="mean"
        Strategy to use to generate predictions.

        * "mean": always predicts the mean of the training set
        * "median": always predicts the median of the training set
        * "quantile": always predicts a specified quantile of the training set,
          provided with the quantile parameter.
        * "constant": always predicts a constant value that is provided by
          the user.

    constant : int or float or array-like of shape (n_outputs,), default=None
        The explicit constant as predicted by the "constant" strategy. This
        parameter is useful only for the "constant" strategy.

    quantile : float in [0.0, 1.0], default=None
        The quantile to predict using the "quantile" strategy. A quantile of
        0.5 corresponds to the median, while 0.0 to the minimum and 1.0 to the
        maximum.

    Attributes
    ----------
    constant_ : ndarray of shape (1, n_outputs)
        Mean or median or quantile of the training targets or constant value
        given by the user.

    n_features_in_ : int
        Number of features seen during :term:`fit`.

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during :term:`fit`. Defined only when `X` has
        feature names that are all strings.

    n_outputs_ : int
        Number of outputs.

    See Also
    --------
    DummyClassifier: Classifier that makes predictions using simple rules.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.dummy import DummyRegressor
    >>> X = np.array([1.0, 2.0, 3.0, 4.0])
    >>> y = np.array([2.0, 3.0, 5.0, 10.0])
    >>> dummy_regr = DummyRegressor(strategy="mean")
    >>> dummy_regr.fit(X, y)
    DummyRegressor()
    >>> dummy_regr.predict(X)
    array([5., 5., 5., 5.])
    >>> dummy_regr.score(X, y)
    0.0
    >   meanmedianr   quantileg        rs   both)closedNneitherr    )r"   r   r   r#   r   r"   r   r   c                0    || _         || _        || _        d S r%   r   )r&   r"   r   r   s       r'   r(   zDummyRegressor.__init__  s       r)   Tr*   c                 j   t          | |d           t          dd          t                    dk    rt          d          j        dk    rt          j        d	          j        d         | _        t          |           t          |          | j        dk    rt          j        d          | _        nL| j        dk    rFt          j        d          | _        n"fdt          | j                  D             | _        n| j        dk    rg| j        t          d          | j        dz  t          j        d          | _        nfdt          | j                  D             | _        n| j        dk    r~| j        t'          d          t          | j        g ddd          | _        | j        dk    r>| j        j        d         j        d         k    rt          dj        d         z            t          j        | j        d          | _        | S )a  Fit the baseline regressor.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data.

        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            Target values.

        sample_weight : array-like of shape (n_samples,), default=None
            Sample weights.

        Returns
        -------
        self : object
            Fitted estimator.
        Tr-   FrQ   )	ensure_2d
input_namer   zy must not be empty.r   r/   Nr   )r`   weightsr   r_   c                 H    g | ]}t          d d |f         d          S )Ng      I@
percentiler   )r5   r7   rR   rQ   s     r'   rZ   z&DummyRegressor.fit.<locals>.<listcomp>H  sD     " " " )111a4-DQQQ" " "r)   r   z^When using `strategy='quantile', you have to specify the desired quantile in the range [0, 1].g      Y@)r`   qc                 H    g | ]}t          d d |f                   S )Nr   r   )r5   r7   r   rR   rQ   s     r'   rZ   z&DummyRegressor.fit.<locals>.<listcomp>W  sD     " " " )111a4-JWWW" " "r)   r   r1   )csrcsccoo)accept_sparser   ensure_min_samplesr2   )r   r0   )r   r   r   rH   rD   rA   rE   rF   rG   r   r   r"   average	constant_r   rL   r   r   r   	TypeError)r&   rP   rQ   rR   r   s     ``@r'   rT   zDummyRegressor.fit  s   ( 	dA5555Us;;;q66Q;;34446Q;;
1g&&A'!*1m444$0BBM=F""Z=IIIDNN]h&&$!#11!5!5!5" " " " ""4?33" " "
 ]j((}$ 4   .J$!#qqJ!G!G!G" " " " " ""4?33" " "
 ]j((}$:  
 )333#$	  DN !##(<Q(?171:(M(M FQRS   DNG<<r)   Fc                 h   t          |            t          |          }t          j        || j        f| j        t          j        | j                  j                  }t          j        || j        f          }| j        dk    r(t          j	        |          }t          j	        |          }|r||fn|S )a  Perform classification on test vectors X.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Test data.

        return_std : bool, default=False
            Whether to return the standard deviation of posterior prediction.
            All zeros in this case.

            .. versionadded:: 0.20

        Returns
        -------
        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            Predicted target values for X.

        y_std : array-like of shape (n_samples,) or (n_samples, n_outputs)
            Standard deviation of predictive distribution of query points.
        rq   r   )
r   r   rA   fullrG   r   rW   rr   rt   rl   )r&   rP   
return_stdrf   rQ   y_stds         r'   ro   zDummyRegressor.predictr  s    , 	 OO	G(N(4>**0
 
 

 )T_566?aAHUOOE'.5zzQ.r)   c                     t                                                      }d|j        _        d|j        _        d|_        |S r   )r   r   r   r   regressor_tagsr   r   r   s     r'   r   zDummyRegressor.__sklearn_tags__  s:    ww''))!%)-&!r)   c                     |$t          j        t          |          df          }t                                          |||          S )a  Return the coefficient of determination R^2 of the prediction.

        The coefficient R^2 is defined as `(1 - u/v)`, where `u` is the
        residual sum of squares `((y_true - y_pred) ** 2).sum()` and `v` is the
        total sum of squares `((y_true - y_true.mean()) ** 2).sum()`. The best
        possible score is 1.0 and it can be negative (because the model can be
        arbitrarily worse). A constant model that always predicts the expected
        value of y, disregarding the input features, would get a R^2 score of
        0.0.

        Parameters
        ----------
        X : None or array-like of shape (n_samples, n_features)
            Test samples. Passing None as test samples gives the same result
            as passing real test samples, since `DummyRegressor`
            operates independently of the sampled observations.

        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            True values for X.

        sample_weight : array-like of shape (n_samples,), default=None
            Sample weights.

        Returns
        -------
        score : float
            R^2 of `self.predict(X)` w.r.t. y.
        Nr   r   r   r   s       r'   r   zDummyRegressor.score  s@    : 9A{+++Aww}}Q=111r)   r%   )F)r   r   r   r   r   r   r   r#   r   r   r(   r
   rT   ro   r   r   r   r   s   @r'   r   r     s@        ? ?D  Z J J JKKLXdCV<<<dCHT4i888
$ $D    $*D4 ! ! ! ! !
 \555Q Q Q 65Qf$/ $/ $/ $/L    2 2 2 2 2 2 2 2 2 2r)   r   )$r   r=   numbersr   r   numpyrA   scipy.sparser   r:   baser   r   r   r	   r
   utilsr   utils._param_validationr   r   utils.multiclassr   utils.randomr   utils.statsr   utils.validationr   r   r   r   r   r   r   r   r4   r)   r'   <module>r      s   < <
  " " " " " " " "                        & % % % % % 9 9 9 9 9 9 9 9 0 0 0 0 0 0 , , , , , , - - - - - -               h2 h2 h2 h2 h2& h2 h2 h2Vq2 q2 q2 q2 q2%~} q2 q2 q2 q2 q2r)   