
    0Ph                     2   d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlZ	d dl
mZ d dlmZ ddlmZmZmZ ddlmZ dd	lmZmZ dd
lmZmZ ddlmZ ddlmZ ddlm Z m!Z!m"Z"m#Z#m$Z$ d Z%d Z& G d dee          Z' G d de'          Z( G d dee          Z)dS )    N)Counter)partial)Callable)sparse   )BaseEstimatorTransformerMixin_fit_context)	_get_mask)is_pandas_nais_scalar_nan)MissingValues
StrOptions)_mode)_get_median)FLOAT_DTYPES_check_feature_names_in_check_n_featurescheck_is_fittedvalidate_datac                     t          |          rd S | j        j        dv rOt          |t          j                  s7t          d                    | j        t          |                              d S d S )N)fiuzn'X' and 'missing_values' types are expected to be both numerical. Got X.dtype={} and  type(missing_values)={}.)	r   dtypekind
isinstancenumbersReal
ValueErrorformattype)Xmissing_valuess     T/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/sklearn/impute/_base.py_check_inputs_dtyper&      su    N## w|&&z.',/W/W&((.qw^8L8L(M(M
 
 	
 '&&&    c                    | j         dk    r| j        t          k    r^t          |           }|                    d          d         d         t          fd|                                D                       }n0t          |           }|d         d         }|d         d         nd}ddk    r|dk    rt          j	        S |k     r|S |k    r|S |k    rt          ||          S dS )zCompute the most frequent value in a 1d array extended with
    [extra_value] * n_repeat, where extra_value is assumed to be not part
    of the array.r      c              3   .   K   | ]\  }}|k    |V  d S N ).0valuecountmost_frequent_counts      r%   	<genexpr>z!_most_frequent.<locals>.<genexpr>5   s?       & & E5/// ////& &r'   N)
sizer   objectr   most_commonminitemsr   npnan)arrayextra_valuen_repeatcountermost_frequent_valuemoder0   s         @r%   _most_frequentr?   )   s#   
 zA~~;&   ennG")"5"5a"8"8";A">"% & & & &$+MMOO& & & # # <<D"&q'!*"&q'!* aHMMv	x	'	'	x	'	'""		(	(&444 
)	(r'   c                        e Zd ZU dZ e            gdgdgdZeed<   ej	        ddddZ
d Zd Zd	 Zd
 Z fdZ xZS )_BaseImputerzYBase class for all imputers.

    It adds automatically support for `add_indicator`.
    booleanr$   add_indicatorkeep_empty_features_parameter_constraintsFc                0    || _         || _        || _        d S r+   rC   )selfr$   rD   rE   s       r%   __init__z_BaseImputer.__init__Z   s"     -*#6   r'   c                     | j         r9t          | j        d          | _        | j                            |d           dS d| _        dS )zFit a MissingIndicator.F)r$   error_on_newT)precomputedN)rD   MissingIndicatorr$   
indicator__fitrH   r#   s     r%   _fit_indicatorz_BaseImputer._fit_indicatora   sY     	#.#2  DO O   55555"DOOOr'   c                     | j         r9t          | d          st          d          | j                            |          S dS )zCompute the indicator mask.'

        Note that X must be the original data as passed to the imputer before
        any imputation, since imputation may be done inplace in some cases.
        rN   z<Make sure to call _fit_indicator before _transform_indicatorN)rD   hasattrr    rN   	transformrP   s     r%   _transform_indicatorz!_BaseImputer._transform_indicatork   sX      	04..  R   ?,,Q///	0 	0r'   c                     | j         s|S t          j        |          r!t          t          j        |j                  }nt          j        }|t          d           |||f          S )z1Concatenate indicator mask with the imputed data.)r!   Nz}Data from the missing indicator are not provided. Call _fit_indicator and _transform_indicator in the imputer implementation.)rD   spissparser   hstackr!   r7   r    )rH   	X_imputedX_indicatorrY   s       r%   _concatenate_indicatorz#_BaseImputer._concatenate_indicatorx   s{    ! 	;y!! 	 RYy/?@@@FFYF"   vy+.///r'   c                 t    | j         s|S | j                            |          }t          j        ||g          S r+   )rD   rN   get_feature_names_outr7   concatenate)rH   namesinput_featuresindicator_namess       r%   (_concatenate_indicator_feature_names_outz5_BaseImputer._concatenate_indicator_feature_names_out   s<    ! 	L/??OO~uo6777r'   c                     t                                                      }t          | j                  |j        _        |S r+   )super__sklearn_tags__r   r$   
input_tags	allow_nanrH   tags	__class__s     r%   rf   z_BaseImputer.__sklearn_tags__   s2    ww''))$1$2E$F$F!r'   )__name__
__module____qualname____doc__r   rF   dict__annotations__r7   r8   rI   rQ   rU   r\   rc   rf   __classcell__rk   s   @r%   rA   rA   N   s           )=??+# ){$ $D    !#eQV7 7 7 7 7# # #0 0 00 0 0*8 8 8        r'   rA   c                        e Zd ZU dZi ej         eh d          egddgdZee	d<   e
j        ddd	d
d
d fd
Zd Z ed	          dd            Z fdZ fdZ fdZd Z fdZddZ xZS )SimpleImputeraq  Univariate imputer for completing missing values with simple strategies.

    Replace missing values using a descriptive statistic (e.g. mean, median, or
    most frequent) along each column, or using a constant value.

    Read more in the :ref:`User Guide <impute>`.

    .. versionadded:: 0.20
       `SimpleImputer` replaces the previous `sklearn.preprocessing.Imputer`
       estimator which is now removed.

    Parameters
    ----------
    missing_values : int, float, str, np.nan, None or pandas.NA, default=np.nan
        The placeholder for the missing values. All occurrences of
        `missing_values` will be imputed. For pandas' dataframes with
        nullable integer dtypes with missing values, `missing_values`
        can be set to either `np.nan` or `pd.NA`.

    strategy : str or Callable, default='mean'
        The imputation strategy.

        - If "mean", then replace missing values using the mean along
          each column. Can only be used with numeric data.
        - If "median", then replace missing values using the median along
          each column. Can only be used with numeric data.
        - If "most_frequent", then replace missing using the most frequent
          value along each column. Can be used with strings or numeric data.
          If there is more than one such value, only the smallest is returned.
        - If "constant", then replace missing values with fill_value. Can be
          used with strings or numeric data.
        - If an instance of Callable, then replace missing values using the
          scalar statistic returned by running the callable over a dense 1d
          array containing non-missing values of each column.

        .. versionadded:: 0.20
           strategy="constant" for fixed value imputation.

        .. versionadded:: 1.5
           strategy=callable for custom value imputation.

    fill_value : str or numerical value, default=None
        When strategy == "constant", `fill_value` is used to replace all
        occurrences of missing_values. For string or object data types,
        `fill_value` must be a string.
        If `None`, `fill_value` will be 0 when imputing numerical
        data and "missing_value" for strings or object data types.

    copy : bool, default=True
        If True, a copy of X will be created. If False, imputation will
        be done in-place whenever possible. Note that, in the following cases,
        a new copy will always be made, even if `copy=False`:

        - If `X` is not an array of floating values;
        - If `X` is encoded as a CSR matrix;
        - If `add_indicator=True`.

    add_indicator : bool, default=False
        If True, a :class:`MissingIndicator` transform will stack onto output
        of the imputer's transform. This allows a predictive estimator
        to account for missingness despite imputation. If a feature has no
        missing values at fit/train time, the feature won't appear on
        the missing indicator even if there are missing values at
        transform/test time.

    keep_empty_features : bool, default=False
        If True, features that consist exclusively of missing values when
        `fit` is called are returned in results when `transform` is called.
        The imputed value is always `0` except when `strategy="constant"`
        in which case `fill_value` will be used instead.

        .. versionadded:: 1.2

        .. versionchanged:: 1.6
            Currently, when `keep_empty_feature=False` and `strategy="constant"`,
            empty features are not dropped. This behaviour will change in version
            1.8. Set `keep_empty_feature=True` to preserve this behaviour.

    Attributes
    ----------
    statistics_ : array of shape (n_features,)
        The imputation fill value for each feature.
        Computing statistics can result in `np.nan` values.
        During :meth:`transform`, features corresponding to `np.nan`
        statistics will be discarded.

    indicator_ : :class:`~sklearn.impute.MissingIndicator`
        Indicator used to add binary indicators for missing values.
        `None` if `add_indicator=False`.

    n_features_in_ : int
        Number of features seen during :term:`fit`.

        .. versionadded:: 0.24

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during :term:`fit`. Defined only when `X`
        has feature names that are all strings.

        .. versionadded:: 1.0

    See Also
    --------
    IterativeImputer : Multivariate imputer that estimates values to impute for
        each feature with missing values from all the others.
    KNNImputer : Multivariate imputer that estimates missing features using
        nearest samples.

    Notes
    -----
    Columns which only contained missing values at :meth:`fit` are discarded
    upon :meth:`transform` if strategy is not `"constant"`.

    In a prediction context, simple imputation usually performs poorly when
    associated with a weak learner. However, with a powerful learner, it can
    lead to as good or better performance than complex imputation such as
    :class:`~sklearn.impute.IterativeImputer` or :class:`~sklearn.impute.KNNImputer`.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.impute import SimpleImputer
    >>> imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean')
    >>> imp_mean.fit([[7, 2, 3], [4, np.nan, 6], [10, 5, 9]])
    SimpleImputer()
    >>> X = [[np.nan, 2, 3], [4, np.nan, 6], [10, np.nan, 9]]
    >>> print(imp_mean.transform(X))
    [[ 7.   2.   3. ]
     [ 4.   3.5  6. ]
     [10.   3.5  9. ]]

    For a more detailed example see
    :ref:`sphx_glr_auto_examples_impute_plot_missing_values.py`.
    >   meanmedianconstantmost_frequentno_validationrB   )strategy
fill_valuecopyrF   rv   NTF)r$   r{   r|   r}   rD   rE   c                z    t                                          |||           || _        || _        || _        d S )NrC   )re   rI   r{   r|   r}   )rH   r$   r{   r|   r}   rD   rE   rk   s          r%   rI   zSimpleImputer.__init__,  sJ     	)' 3 	 	
 	
 	

 !$			r'   c           
      0   | j         dv r9t          |t                    r!t          d |D                       rt          }n
d }nt
          }|s| j        j        dk    r| j        }t          | j	                  st          | j	                  rd}nd}	 t          | ||d||sdnd || j                  }nP# t          $ rC}dt          |          v r+t          d	                    | j         |                    }|d |d }~ww xY w|r|j        | _        t#          || j	                   |j        j        d
vr't          d                    |j                            t%          j        |          r| j	        dk    rt          d          | j         dk    r|r2| j        +t+          | j                  }d| j        d|d|j        d}n$|s| j        j        }d|d|j        d}n|j        }t/          j        ||j        d          st          |          |S )N)ry   rx   c              3   J   K   | ]}|D ]}t          |t                    V  d S r+   )r   str)r-   rowelems      r%   r1   z0SimpleImputer._validate_input.<locals>.<genexpr>E  sQ       + +*-s+ +7;
4%%+ + + + + + +r'   O	allow-nanTcsc)resetaccept_sparser   force_writeableensure_all_finiter}   zcould not convertz0Cannot use {} strategy with non-numeric data:
{}r   r   r   r   zSimpleImputer does not support data with dtype {0}. Please provide either a numeric array (with a floating point or integer dtype) or categorical data represented either as an array with integer dtype or an array of string values with an object dtype.r   dImputation not possible when missing_values == 0 and input is sparse. Provide a dense array instead.rx   zfill_value=z
 (of type z+) cannot be cast to the input data that is z2. Make sure that both dtypes are of the same kind.z%The dtype of the filling value (i.e. z\. Make sure that the dtypes of the input data is of the same kind between fit and transform.	same_kind)casting)r{   r   listanyr3   r   
_fit_dtyper   r   r$   r   r   r}   r    r   r!   r   r&   rW   rX   r|   r"   statistics_r7   can_cast)	rH   r#   in_fitr   r   venew_vefill_value_dtypeerr_msgs	            r%   _validate_inputzSimpleImputer._validate_input?  s   =999
 !T"" s + +12+ + + ( (   E 	$$/.#55OE+,, 	%d>Q0R0R 	% + $	#,2 <"3Y	 	 	AA  		 		 		"c"gg--#GNNr  
 $&		  	&gDOAt23337<333(
 )/qw   ;q>> 	d1Q66 !   =J&& +$/5#'#8#8 =$/ = =?O = =@A= = = 
  +#'#3#9 )<L ) )@A) ) )  $%7  ;/+NNN * )))s    B2 2
C?<>C::C?prefer_skip_nested_validationc                 >   |                      |d          }| j        |j        j        dv rd}n
d}n| j        }t	          j        |          r(|                     || j        | j        |          | _	        n'| 
                    || j        | j        |          | _	        | S )a  Fit the imputer on `X`.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            Input data, where `n_samples` is the number of samples and
            `n_features` is the number of features.

        y : Ignored
            Not used, present here for API consistency by convention.

        Returns
        -------
        self : object
            Fitted estimator.
        Tr   N)r   r   r   r   missing_value)r   r|   r   r   rW   rX   _sparse_fitr{   r$   r   
_dense_fit)rH   r#   yr|   s       r%   fitzSimpleImputer.fit  s    $   4 00 ?"w|..

,

J;q>> 	#//4=$"5z   D  $4=$"5z   D r'   c                    t          ||          j        }|j        d         t          j        |j                  z
  }t          j        |j        d                   }|dk    rk| j        sMt          fdt          j        d                   D                       rt          j        dt                     |                    |           nlt          |j        d                   D ]P}|j        |j        |         |j        |dz                     }	||j        |         |j        |dz                     }
|	|
          }	t          |	d          }|	|          }	|                                }||         |z   }t          |	          dk    r| j        rd||<   |dk    r7|	j        |z   }|dk    rt          j        n|	                                |z  ||<   |dk    rt%          |	|          ||<   |dk    rt'          |	d|          ||<   #t)          |t*                    r|                     |	          ||<   Rt/                                                     |S )	z#Fit the transformer on sparse data.r   r)   rx   c                 L    g | ] }t          d d |f         j                  !S r+   )alldata)r-   r   missing_masks     r%   
<listcomp>z-SimpleImputer._sparse_fit.<locals>.<listcomp>  s1    TTT!\!!!Q$',--TTTr'   Currently, when `keep_empty_feature=False` and `strategy="constant"`, empty features are not dropped. This behaviour will change in version 1.8. Set `keep_empty_feature=True` to preserve this behaviour.rv   rw   ry   )r   r   shaper7   diffindptremptyrE   r   rangewarningswarnFutureWarningfillsumlenr2   r8   r   r?   r   r   r{   re   rQ   )rH   r#   r{   r$   r|   	mask_datan_implicit_zeros
statisticsr   columnmask_column
mask_zerosn_explicit_zerosn_zerossr   rk   s                  @r%   r   zSimpleImputer._sparse_fit  se    N33 %	71:(9(99Xagaj))
z!! + 	TTTTu\=OPQ=R7S7STTT1 1 	 L "   OOJ''''171:&& > >ahq1uo =>'ahq1uo(EF- 'vq11
,#->>#3#3 *1-0@@v;;!##(@#$%JqMM6))"K'123q&&fjjllQ>N
1!X--(3FG(D(D
1!_44(6vq'(J(J
1#Hh77 >(,f(=(=
1|,,,r'   c                    t          ||          }t          j        ||          }t                                          |           |dk    rwt
          j                            |d          }t
          j                            |          }| j        rdnt
          j	        |t
          j        
                    |          <   |S |dk    rwt
          j                            |d          }	t
          j                            |	          }
| j        rdnt
          j	        |
t
          j                            |	          <   |
S |dk    r(|                                }|                                }|j        j        dk    r't          j        |j        d         t$                    }nt          j        |j        d                   }t'          t)          |d	d	         |d	d	                             D ]z\  }\  }}t          j        |                              t.                    }||         }t1          |          dk    r| j        rd||<   \t3          |t
          j	        d          ||<   {|S |d
k    r| j        sTt          j
        |                              d                                          rt9          j        dt<                     t          j        |j        d         ||j                  S tA          |tB                    rrt          j        |j        d                   }tE          |j        d                   D ]6}| #                    |d	d	|f         $                                          ||<   7|S d	S )z"Fit the transformer on dense data.)maskrv   r   axisrw   ry   r   r   Nrx   r   r)   )%r   mamasked_arrayre   rQ   r7   rv   getdatarE   r8   getmaskrw   getmaskarray	transposer   r   r   r   r3   	enumerateziplogical_notastypeboolr   r?   r   r   r   r   r   fullr   r   r   r{   
compressed)rH   r#   r{   r$   r|   r   masked_Xmean_maskedrv   median_maskedrw   r   ry   r   r   row_maskr   rk   s                    r%   r   zSimpleImputer._dense_fit  s/    N33?1<888|,,, v%**XA*66K5==--D484L/XqqRTRXD{++,K !!ELLL::MU]]=11F-926 25%%m445 M (( A))++Dw|s"" "6 B B B " 4 4&/AaaaD$qqq'0B0B&C&C F F"?C>(33::4@@(ms88q==T%=='(M!$$'5c261'E'EM!$$   ## + 
80D0D0H0Ha0H0P0P0T0T0V0V L "   7171:zAAAA (++ 	(."344J8>!,-- K K $hqqq!tn.G.G.I.I J J
1		 	r'   c                    t          |            |                     |d          }| j        }|j        d         |j        d         k    r/t	          d|j        d         | j        j        d         fz            t          || j                  }| j        dk    s| j        r|}d}nt          |t          j
                  }t          j        |          }||         }t          j        |          }|                                rnt          j        |j        d                   |         }t          | d          r| j        |         }t#          j        d	| d
| j         d           |dd|f         }t'          j        |          r| j        dk    rt	          d          ||j        }	nt          |j        | j                  }	t          j        t          j        t/          |j                  dz
  t2                    t          j        |j                            |	         }
||
                             |j        d          |j        |	<   np||}n|dd|f         }t          j        |d          }t          j        ||          }t          j        |                                          ddd         }|||<   tA                      !                    |          }tA                      "                    ||          S )ah  Impute all missing values in `X`.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            The input data to complete.

        Returns
        -------
        X_imputed : {ndarray, sparse matrix} of shape                 (n_samples, n_features_out)
            `X` with imputed values.
        Fr   r)   r   z)X has %d features per sample, expected %drx   Nfeature_names_in_z/Skipping features without any observed values: zI. At least one non-missing value is needed for imputation with strategy='z'.r   r   )r}   r   )#r   r   r   r   r    r   r$   r{   rE   r7   r8   r   flatnonzeror   arangerS   r   r   r   rW   rX   r   repeatr   r   intr   r   r   r   wherer   re   rU   r\   )rH   r#   r   r   valid_statisticsvalid_statistics_indexesinvalid_mask
valid_maskinvalid_featuresr   indexesmask_valid_features	n_missingvaluescoordinatesr[   rk   s                   r%   rT   zSimpleImputer.transformO  s    	  5 11%
71:)!,,,;71:t/5a89:   !D$788 =J&&$*B&)'+$$ %Z88L55J)*5')~j'A'A$!! 
3#%9QWQZ#8#8#F 4!455 P'+'=>N'O$H(H H6:mH H H  
 aaa112 ;q>> 	$"a'' %   ,3',DD$QVT-@AAD)Ic!(mma/s;;;RWQX=N=N   08??e?TTt (/&2##&21116N3N&O#2;;;IY/;;F(#6#@#@#B#BCCDDbDIK#AkNgg22<@@ww--a===r'   c                 
   t          |            | j        st          d| j         d          t          | j        j                  }|j        d         |z
  }|ddd|f                                         }|dd|df                             t                    }t          | j
                  }|j        d         |f}t          j        |          }||dd| j        j        f<   |                    t                    }	d\  }
}|
t          |j                  k     rZt          j        |dd|f                   s|j        |
         |dd|f<   |
dz  }
|dz  }n|dz  }|
t          |j                  k     Z| j        ||	<   |S )a=  Convert the data back to the original representation.

        Inverts the `transform` operation performed on an array.
        This operation can only be performed after :class:`SimpleImputer` is
        instantiated with `add_indicator=True`.

        Note that `inverse_transform` can only invert the transform in
        features that have binary indicators for missing values. If a feature
        has no missing values at `fit` time, the feature won't have a binary
        indicator, and the imputation done at `transform` time won't be
        inverted.

        .. versionadded:: 0.24

        Parameters
        ----------
        X : array-like of shape                 (n_samples, n_features + n_features_missing_indicator)
            The imputed data to be reverted to original data. It has to be
            an augmented array of imputed data and the missing indicator mask.

        Returns
        -------
        X_original : ndarray of shape (n_samples, n_features)
            The original `X` with missing values as it was prior
            to imputation.
        zr'inverse_transform' works only when 'SimpleImputer' is instantiated with 'add_indicator=True'. Got 'add_indicator=z
' instead.r)   Nr   )r   r   )r   rD   r    r   rN   	features_r   r}   r   r   r   r7   zerosTr   r$   )rH   r#   n_features_missingnon_empty_feature_countarray_imputedr   n_features_originalshape_original
X_original	full_maskimputed_idxoriginal_idxs               r%   inverse_transformzSimpleImputer.inverse_transform  s   8 	! 	 '+&8     !!:;;"#'!*/A"A!!!55556;;==34445<<TBB!$"233'!*&9:Xn--
3?
111do//0%%d++	$(!\C00006*QQQ_566 ".;ok.J
111l?+q !! C0000 !% 3
9r'   c                     t                                                      }d|j        _        t	          | j                  pt          | j                  |j        _        |S NT)re   rf   rg   r   r   r$   r   rh   ri   s     r%   rf   zSimpleImputer.__sklearn_tags__  sW    ww''))!%$01D$E$E %
J
 J
! r'   c                     t          | d           t          | |          }t          j        t	          | j        t          j                            }||         }|                     ||          S )  Get output feature names for transformation.

        Parameters
        ----------
        input_features : array-like of str or None, default=None
            Input features.

            - If `input_features` is `None`, then `feature_names_in_` is
              used as feature names in. If `feature_names_in_` is not defined,
              then the following input feature names are generated:
              `["x0", "x1", ..., "x(n_features_in_ - 1)"]`.
            - If `input_features` is an array-like, then `input_features` must
              match `feature_names_in_` if `feature_names_in_` is defined.

        Returns
        -------
        feature_names_out : ndarray of str objects
            Transformed feature names.
        n_features_in_)r   r   r7   r   r   r   r8   rc   )rH   ra   non_missing_maskr`   s       r%   r^   z#SimpleImputer.get_feature_names_out  sd    ( 	.///0~FF>)D4Dbf*M*MNN/0<<UNSSSr'   r+   )rl   rm   rn   ro   rA   rF   r   callablerp   rq   r7   r8   rI   r   r
   r   r   r   rT   r   rf   r^   rr   rs   s   @r%   ru   ru      s        E EN$

-$ JFFFGG
 &$ $ $D    v!      &_ _ _B \555& & & 65&P8 8 8 8 8tJ J J J JXU> U> U> U> U>n< < <|    T T T T T T T Tr'   ru   c                       e Zd ZU dZ e            g eddh          gd edh          gdgdZeed<   e	j
        ddddd	Zd
 Zd ZddZ ed          dd            Zd Z ed          dd            ZddZ fdZ xZS )rM   a  Binary indicators for missing values.

    Note that this component typically should not be used in a vanilla
    :class:`~sklearn.pipeline.Pipeline` consisting of transformers and a
    classifier, but rather could be added using a
    :class:`~sklearn.pipeline.FeatureUnion` or
    :class:`~sklearn.compose.ColumnTransformer`.

    Read more in the :ref:`User Guide <impute>`.

    .. versionadded:: 0.20

    Parameters
    ----------
    missing_values : int, float, str, np.nan or None, default=np.nan
        The placeholder for the missing values. All occurrences of
        `missing_values` will be imputed. For pandas' dataframes with
        nullable integer dtypes with missing values, `missing_values`
        should be set to `np.nan`, since `pd.NA` will be converted to `np.nan`.

    features : {'missing-only', 'all'}, default='missing-only'
        Whether the imputer mask should represent all or a subset of
        features.

        - If `'missing-only'` (default), the imputer mask will only represent
          features containing missing values during fit time.
        - If `'all'`, the imputer mask will represent all features.

    sparse : bool or 'auto', default='auto'
        Whether the imputer mask format should be sparse or dense.

        - If `'auto'` (default), the imputer mask will be of same type as
          input.
        - If `True`, the imputer mask will be a sparse matrix.
        - If `False`, the imputer mask will be a numpy array.

    error_on_new : bool, default=True
        If `True`, :meth:`transform` will raise an error when there are
        features with missing values that have no missing values in
        :meth:`fit`. This is applicable only when `features='missing-only'`.

    Attributes
    ----------
    features_ : ndarray of shape (n_missing_features,) or (n_features,)
        The features indices which will be returned when calling
        :meth:`transform`. They are computed during :meth:`fit`. If
        `features='all'`, `features_` is equal to `range(n_features)`.

    n_features_in_ : int
        Number of features seen during :term:`fit`.

        .. versionadded:: 0.24

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during :term:`fit`. Defined only when `X`
        has feature names that are all strings.

        .. versionadded:: 1.0

    See Also
    --------
    SimpleImputer : Univariate imputation of missing values.
    IterativeImputer : Multivariate imputation of missing values.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.impute import MissingIndicator
    >>> X1 = np.array([[np.nan, 1, 3],
    ...                [4, 0, np.nan],
    ...                [8, 1, 0]])
    >>> X2 = np.array([[5, 1, np.nan],
    ...                [np.nan, 2, 3],
    ...                [2, 4, 0]])
    >>> indicator = MissingIndicator()
    >>> indicator.fit(X1)
    MissingIndicator()
    >>> X2_tr = indicator.transform(X2)
    >>> X2_tr
    array([[False,  True],
           [ True, False],
           [False, False]])
    missing-onlyr   rB   autor$   featuresr   rK   rF   Tc                >    || _         || _        || _        || _        d S r+   r   )rH   r$   r   r   rK   s        r%   rI   zMissingIndicator.__init__c  s(     - (r'   c                    | j         st          || j                  }n|}t          j        |          rs|                                 | j        dk    r|                    d          }| j        du r|	                                }n}|j
        dk    r|                                }n]| j         st          || j                  }n|}| j        dk    r|                    d          }| j        du rt          j        |          }| j        dk    r t          j        |j        d                   }nt          j        |          }||fS )	a  Compute the imputer mask and the indices of the features
        containing missing values.

        Parameters
        ----------
        X : {ndarray, sparse matrix} of shape (n_samples, n_features)
            The input data with missing values. Note that `X` has been
            checked in :meth:`fit` and :meth:`transform` before to call this
            function.

        Returns
        -------
        imputer_mask : {ndarray, sparse matrix} of shape         (n_samples, n_features)
            The imputer mask of the original data.

        features_with_missing : ndarray of shape (n_features_with_missing)
            The features containing missing values.
        r   r   r   FcsrTr   r)   )_precomputedr   r$   rW   rX   eliminate_zerosr   getnnzr   toarrayr!   tocscr   
csc_matrixr7   r   r   r   )rH   r#   imputer_maskr   features_indicess        r%   _get_missing_features_infoz+MissingIndicator._get_missing_features_infop  sR   (   	$Q(;<<LLL;q>> 	;((***}..(//Q/77	{e##+3355$--+1133$ !(D,?@@ }..(,,!,44	{d""!}\::=E!!!y44!~i88---r'   c                 R   t          | j                  sd}nd}t          | ||dd |          }t          || j                   |j        j        dvr't          d                    |j                            t          j	        |          r| j        dk    rt          d          |S )	NTr   )r   r  )r   r   r   r   r   zMissingIndicator does not support data with dtype {0}. Please provide either a numeric array (with a floating point or integer dtype) or categorical data represented either as an array with integer dtype or an array of string values with an object dtype.r   zSSparse input with missing_values=0 is not supported. Provide a dense array instead.)
r   r$   r   r&   r   r   r    r!   rW   rX   )rH   r#   r   r   s       r%   r   z MissingIndicator._validate_input  s    T011 	, $ +(/
 
 
 	At23337<333(
 )/qw   ;q>> 	d1Q66 !   r'   NFc                 \   |r7t          |d          r|j        j        dk    st          d          d| _        nd| _        | j        s|                     |d          }nt          | |d           |j        d         | _        | 	                    |          }|d         | _
        |d	         S )
aO  Fit the transformer on `X`.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Input data, where `n_samples` is the number of samples and
            `n_features` is the number of features.
            If `precomputed=True`, then `X` is a mask of the input data.

        precomputed : bool
            Whether the input data is a mask.

        Returns
        -------
        imputer_mask : {ndarray, sparse matrix} of shape (n_samples,         n_features)
            The imputer mask of the original data.
        r   b4precomputed is True but the input data is not a maskTFr   )r   r)   r   )rS   r   r   r    r  r   r   r   _n_featuresr  r   )rH   r#   r   rL   missing_features_infos        r%   rO   zMissingIndicator._fit  s    &  	&Aw'' YAGLC,?,? !WXXX $D %D   	3$$Qt$44AA dAT222271: $ ? ? B B.q1$Q''r'   r   c                 2    |                      ||           | S )a  Fit the transformer on `X`.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Input data, where `n_samples` is the number of samples and
            `n_features` is the number of features.

        y : Ignored
            Not used, present for API consistency by convention.

        Returns
        -------
        self : object
            Fitted estimator.
        )rO   )rH   r#   r   s      r%   r   zMissingIndicator.fit  s    $ 			!Qr'   c                    t          |            | j        s|                     |d          }n/t          |d          r|j        j        dk    st          d          |                     |          \  }}| j        dk    rtt          j
        || j                  }| j        r-|j        dk    r"t          d                    |                    | j        j        | j        k     r|d	d	| j        f         }|S )
a  Generate missing values indicator for `X`.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The input data to complete.

        Returns
        -------
        Xt : {ndarray, sparse matrix} of shape (n_samples, n_features)         or (n_samples, n_features_with_missing)
            The missing indicator for input data. The data type of `Xt`
            will be boolean.
        Fr   r   r  r  r   r   zSThe features {} have missing values in transform but have no missing values in fit.N)r   r  r   rS   r   r   r    r  r   r7   	setdiff1dr   rK   r2   r!   r  )rH   r#   r	  r   features_diff_fit_transs        r%   rT   zMissingIndicator.transform  s    	   	Y$$Qu$55AAAw'' YAGLC,?,? !WXXX!%!@!@!C!Ch=N**&(l8T^&L&L#  %<%AA%E%E $f%<==   ~"T%555+AAAt~,=>r'   c                 ~    |                      ||          }| j        j        | j        k     r|dd| j        f         }|S )a  Generate missing values indicator for `X`.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The input data to complete.

        y : Ignored
            Not used, present for API consistency by convention.

        Returns
        -------
        Xt : {ndarray, sparse matrix} of shape (n_samples, n_features)         or (n_samples, n_features_with_missing)
            The missing indicator for input data. The data type of `Xt`
            will be boolean.
        N)rO   r   r2   r  )rH   r#   r   r	  s       r%   fit_transformzMissingIndicator.fit_transform1  sC    & yyA>!111'4>(9:Lr'   c                     t          | d           t          | |          }| j        j                                        t          j        fd|| j                 D             t                    S )r   r   c                     g | ]	} d | 
S )_r,   )r-   feature_nameprefixs     r%   r   z:MissingIndicator.get_feature_names_out.<locals>.<listcomp>c  s7         **L**  r'   r   )	r   r   rk   rl   lowerr7   asarrayr   r3   )rH   ra   r  s     @r%   r^   z&MissingIndicator.get_feature_names_outK  s    ( 	.///0~FF(..00z   $24>$B   
 
 
 	
r'   c                     t                                                      }d|j        _        d|j        _        d|j        _        g |j        _        |S r   )re   rf   rg   rh   stringr   transformer_tagspreserves_dtyperi   s     r%   rf   z!MissingIndicator.__sklearn_tags__j  sF    ww''))$(!!%!%02-r'   )NFr+   )rl   rm   rn   ro   r   r   rF   rp   rq   r7   r8   rI   r  r   rO   r
   r   rT   r  r^   rf   rr   rs   s   @r%   rM   rM     s        R Rj )=??+Z 7889jj&223"	$ $D    v) ) ) ) )4. 4. 4.l! ! !F'( '( '( '(R \555   65*' ' 'R \555   652
 
 
 
>        r'   rM   )*r   r   collectionsr   	functoolsr   typingr   numpyr7   numpy.mar   scipyr   rW   baser   r	   r
   utils._maskr   utils._missingr   r   utils._param_validationr   r   utils.fixesr   utils.sparsefuncsr   utils.validationr   r   r   r   r   r&   r?   rA   ru   rM   r,   r'   r%   <module>r0     s                                       @ @ @ @ @ @ @ @ @ @ # # # # # # 8 8 8 8 8 8 8 8 ? ? ? ? ? ? ? ?       + + + + + +             	
 	
 	
"5 "5 "5JI I I I I#] I I IXj	T j	T j	T j	T j	TL j	T j	T j	TZi i i i i' i i i i ir'   