
    0Ph6                        d Z ddlZddlZddlZddlmZ ddlmZmZ ddl	m
Z
mZ ddlZddlZddlmZmZ ddlmZ dd	lmZmZmZ d
dlmZ d
dlmZmZmZmZ  eddd          Z eddd          Z  ej!        e"          Z# e eh d          dge$ej%        dgdgdgdgdgdgdg eed
dd          g eeddd          gd
d          dddddddddd d
d!            Z&	 d$d"Z'd# Z(dS )%zKDDCUP 99 dataset.

A classic dataset for anomaly detection.

The dataset page is available from UCI Machine Learning Repository

https://archive.ics.uci.edu/ml/machine-learning-databases/kddcup99-mld/kddcup.data.gz

    N)GzipFile)IntegralReal)existsjoin   )Bunchcheck_random_state)shuffle)Interval
StrOptionsvalidate_params   )get_data_home)RemoteFileMetadata_convert_data_dataframe_fetch_remote
load_descrkddcup99_dataz.https://ndownloader.figshare.com/files/5976045@3b6c942aa0356c0ca35b7b595a26c89d343652c9db428893e7494f837b274292)filenameurlchecksumkddcup99_10_dataz.https://ndownloader.figshare.com/files/5976042@8045aca0d84e70e622d1148d7df782496f6333bf6eb979a1b0837c42a9fd9561>   SASFhttpsmtpbooleanrandom_stateleft)closedg        neither)
subset	data_homer   r!   	percent10download_if_missing
return_X_yas_frame	n_retriesdelayT)prefer_skip_nested_validationF         ?c        
            t          |          }t          |||||	          }
|
j        }|
j        }|
j        }|
j        }| dk    r|dk    }t          j        |          }||ddf         }||         }||ddf         }||         }|j        d         }t          |          }|
                    d|d          }||         }||         }t          j        ||f         }t          j        ||f         }| dk    s| d	k    s| d
k    r]|dddf         dk    }t          j        ||ddf         ||ddf         f         }|dd         |dd         z   }||         }t          j        |dddf         dz                       t          d                    |dddf<   t          j        |dddf         dz                       t          d                    |dddf<   t          j        |dddf         dz                       t          d                    |dddf<   | d	k    rj|dddf         dk    }||         }||         }t          j        |dddf         |dddf         |dddf         f         }|d         |d         |d         g}| d
k    rj|dddf         dk    }||         }||         }t          j        |dddf         |dddf         |dddf         f         }|d         |d         |d         g}| dk    r\t          j        |dddf         |dddf         |dddf         |dddf         f         }|d         |d         |d         |d         g}|rt!          |||          \  }}t#          d          }d}|rt%          d||||          \  }}}|r||fS t'          ||||||          S )a  Load the kddcup99 dataset (classification).

    Download it if necessary.

    =================   ====================================
    Classes                                               23
    Samples total                                    4898431
    Dimensionality                                        41
    Features            discrete (int) or continuous (float)
    =================   ====================================

    Read more in the :ref:`User Guide <kddcup99_dataset>`.

    .. versionadded:: 0.18

    Parameters
    ----------
    subset : {'SA', 'SF', 'http', 'smtp'}, default=None
        To return the corresponding classical subsets of kddcup 99.
        If None, return the entire kddcup 99 dataset.

    data_home : str or path-like, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit-learn data is stored in '~/scikit_learn_data' subfolders.

        .. versionadded:: 0.19

    shuffle : bool, default=False
        Whether to shuffle dataset.

    random_state : int, RandomState instance or None, default=None
        Determines random number generation for dataset shuffling and for
        selection of abnormal samples if `subset='SA'`. Pass an int for
        reproducible output across multiple function calls.
        See :term:`Glossary <random_state>`.

    percent10 : bool, default=True
        Whether to load only 10 percent of the data.

    download_if_missing : bool, default=True
        If False, raise an OSError if the data is not locally available
        instead of trying to download the data from the source site.

    return_X_y : bool, default=False
        If True, returns ``(data, target)`` instead of a Bunch object. See
        below for more information about the `data` and `target` object.

        .. versionadded:: 0.20

    as_frame : bool, default=False
        If `True`, returns a pandas Dataframe for the ``data`` and ``target``
        objects in the `Bunch` returned object; `Bunch` return object will also
        have a ``frame`` member.

        .. versionadded:: 0.24

    n_retries : int, default=3
        Number of retries when HTTP errors are encountered.

        .. versionadded:: 1.5

    delay : float, default=1.0
        Number of seconds between retries.

        .. versionadded:: 1.5

    Returns
    -------
    data : :class:`~sklearn.utils.Bunch`
        Dictionary-like object, with the following attributes.

        data : {ndarray, dataframe} of shape (494021, 41)
            The data matrix to learn. If `as_frame=True`, `data` will be a
            pandas DataFrame.
        target : {ndarray, series} of shape (494021,)
            The regression target for each sample. If `as_frame=True`, `target`
            will be a pandas Series.
        frame : dataframe of shape (494021, 42)
            Only present when `as_frame=True`. Contains `data` and `target`.
        DESCR : str
            The full description of the dataset.
        feature_names : list
            The names of the dataset columns
        target_names: list
            The names of the target columns

    (data, target) : tuple if ``return_X_y`` is True
        A tuple of two ndarray. The first containing a 2D array of
        shape (n_samples, n_features) with each row representing one
        sample and each column representing the features. The second
        ndarray of shape (n_samples,) containing the target samples.

        .. versionadded:: 0.20
    r&   )r&   r'   r(   r+   r,   r   s   normal.Nr   i1  r   r   r      r      g?F)copy      r   s   https   smtp)r!   zkddcup99.rstfetch_kddcup99)datatargetframetarget_namesfeature_namesDESCR)r   _fetch_brute_kddcup99r8   r9   r<   r;   nplogical_notshaper
   randintr_c_logastypefloatshuffle_methodr   r   r	   )r%   r&   r   r!   r'   r(   r)   r*   r+   r,   kddcup99r8   r9   r<   r;   stnormal_samplesnormal_targetsabnormal_samplesabnormal_targetsn_samples_abnormalrfdescrr:   s                            Z/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/sklearn/datasets/_kddcup99.pyr7   r7   6   su   t 	222I$/  H =D_F*M(L~~j N1ad111:!!9-3A6),77  $6==+A.+A.u^%556~'778~~6))Vv-=-=BK1uT!SbS&\4233</0%crc*]233-??VT!!!Q$Z#-55e%5HHIIQQQT
VT!!!Q$Z#-55e%5HHIIQQQT
VT!!!Q$Z#-55e%5HHIIQQQT
VQQQT
g%A7DAYF5aaadT!!!Q$Zaaad;<D*1-}Q/?qAQRMVQQQT
g%A7DAYF5aaadT!!!Q$Zaaad;<D*1-}Q/?qAQRMT>>5aaadT!!!Q$ZaaadT!!!Q$ZGHDa a a a 	M  O%dFNNNf''FE 
5dFM<
 
tV  V|!#       c                    t          |           } d}|rt          | d|z             }t          }nt          | d|z             }t          }t          |d          }t          |d          }	t	          |          }
g dt
          fdd	d
dt
          fdt
          fdt
          fdt
          fdt
          fdt
          fdt
          fdt
          fdt
          fdt
          fdt
          fdt
          fdt
          fdt
          fdt
          fdt
          fdt
          fdt
          fdt
          fdt
          fdt          fd t          fd!t          fd"t          fd#t          fd$t          fd%t          fd&t
          fd't
          fd(t          fd)t          fd*t          fd+t          fd,t          fd-t          fd.t          fd/t          fd0}d1 |D             }|d2         }|d3d2         }|
r^	 t          j        |          }t          j        |	          }n*# t          $ r&}t          d4t          |           d5          |d3}~ww xY w|rt          |           t                              d6|j        z             t!          ||||7           t#          j        |          }t                              d8           t          ||j                  }t+          |d9:          }g }|                                D ]R}|                                }|                    |                    d;d<                              d=                     S|                                 t                              d>           t9          j        |           t#          j        |t>          ?          }tA          d@          D ].}|d3d3|f         !                    ||                   |d3d3|f<   /|d3d3d3d2f         }|d3d3d2f         }t          j"        ||dAB           t          j"        ||	dAB           nt          dC          tG          ||||gD          S )Ea5  Load the kddcup99 dataset, downloading it if necessary.

    Parameters
    ----------
    data_home : str, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit-learn data is stored in '~/scikit_learn_data' subfolders.

    download_if_missing : bool, default=True
        If False, raise an OSError if the data is not locally available
        instead of trying to download the data from the source site.

    percent10 : bool, default=True
        Whether to load only 10 percent of the data.

    n_retries : int, default=3
        Number of retries when HTTP errors are encountered.

    delay : float, default=1.0
        Number of seconds between retries.

    Returns
    -------
    dataset : :class:`~sklearn.utils.Bunch`
        Dictionary-like object, with the following attributes.

        data : ndarray of shape (494021, 41)
            Each row corresponds to the 41 features in the dataset.
        target : ndarray of shape (494021,)
            Each value corresponds to one of the 21 attack types or to the
            label 'normal.'.
        feature_names : list
            The names of the dataset columns
        target_names: list
            The names of the target columns
        DESCR : str
            Description of the kddcup99 dataset.

    r1   z-py3kddcup99_10rI   samplestargetsduration)protocol_typeS4)serviceS11)flagS6	src_bytes	dst_byteslandwrong_fragmenturgenthotnum_failed_logins	logged_innum_compromised
root_shellsu_attemptednum_rootnum_file_creations
num_shellsnum_access_filesnum_outbound_cmdsis_host_loginis_guest_logincount	srv_countserror_ratesrv_serror_ratererror_ratesrv_rerror_ratesame_srv_ratediff_srv_ratesrv_diff_host_ratedst_host_countdst_host_srv_countdst_host_same_srv_ratedst_host_diff_srv_ratedst_host_same_src_port_ratedst_host_srv_diff_host_ratedst_host_serror_ratedst_host_srv_serror_ratedst_host_rerror_ratedst_host_srv_rerror_rate)labelsS16c                     g | ]
}|d          S )r    ).0cs     rS   
<listcomp>z)_fetch_brute_kddcup99.<locals>.<listcomp>p  s    %%%QAaD%%%rT   Nz7The cache for fetch_kddcup99 is invalid, please delete z! and run the fetch_kddcup99 againzDownloading %s)dirnamer+   r,   zextracting archiverQ   )r   mode
 ,zextraction done)dtype*   r   )compressz1Data not found and `download_if_missing` is False)r8   r9   r<   r;   )$r   r   ARCHIVE_10_PERCENTARCHIVEr   intrG   joblibload	ExceptionOSErrorstr_mkdirploggerinfor   r   r?   r   debugr   r   	readlinesdecodeappendreplacesplitcloseosremoveasarrayobjectrangerF   dumpr	   )r&   r(   r'   r+   r,   
dir_suffix
kddcup_dirarchivesamples_pathtargets_path	availabledtcolumn_namesr;   r<   XyeDTarchive_pathfile_Xylinejs                           rS   r>   r>   
  s   V 	222IJ )]Z%?@@
$)Z*%<==

I..L
I..L|$$I+
	S+
+
 	+
 		+

 
c+
 
c+
 
+
 
3+
 
3+
 
+
 
c"+
 
c+
 
C +
 
s+
 
+
  
S!+
" 
s##+
$ 
s%+
& 
S!'+
( 
c")+
* 
#++
, 
3-+
. 
#/+
0 
c1+
2 
3+
4 
E"5+
6 
7+
8 
E"9+
: 
% ;+
< 
% =+
> 
u%?+
@ 
3A+
B 
s#C+
D 
"5)E+
F 
"5)G+
H 
'.I+
J 
'.K+
L 
 'M+
N 
$U+O+
P 
 'Q+
R 
$U+S+
T 	U+
BZ &%"%%%L#L "%M 'K	L))AL))AA 	 	 	Fz??F F F  	 
 K
$w{2333gzYeTTTTXb\\)***J(899,S999OO%% 	9 	9D;;==DIIdll4,,223778888&'''
	,Z&)))r 	. 	.A!!!Q$xr!u--Bqqq!tHHqqq#2#vJqqq"uI
 	A|a0000A|a00000IJJJ#"^	   s   :(H$ $
I.!IIc                     	 t          j        |            dS # t          $ r!}|j        t          j        k    r Y d}~dS d}~ww xY w)zgEnsure directory d exists (like mkdir -p on Unix)
    No guarantee that the directory is writable.
    N)r   makedirsr   errnoEEXIST)dr   s     rS   r   r     s]    
A   7el"" #"""""s    
A>A)NTTr.   r/   ))__doc__r   loggingr   gzipr   numbersr   r   os.pathr   r   r   numpyr?   utilsr	   r
   r   rH   utils._param_validationr   r   r   r   r   _baser   r   r   r   r   r   	getLogger__name__r   r   PathLiker7   r>   r   r   rT   rS   <module>r      s      				       " " " " " " " "                      - - - - - - - - - - - - - - K K K K K K K K K K                  
8O   ('8O    
	8	$	$ ::::;;TB2;-;'([ ){ kKhxD@@@A(4d9===>  #'  " 
B B B B BL RUX X X Xv    rT   