
    0PhY                     h   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZmZ d dlmZ d dlmZ d dlmZ d dlZd dlZd d	lmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z% d d
l&m'Z'm(Z(m)Z)m*Z*m+Z+ d dl,m-Z- d dl.m/Z/ d dl0m1Z1  G d d          Z2d Z3 ej4        d          d             Z5 ej4        d          d             Z6ej4        d             Z7ej4        d             Z8ej9        :                    ddee2g          d             Z;d Z<d Z=d Z>d Z?ej9        :                    ddgddgf          d              Z@ej9        :                    d!d"d#d$g d%fd&d'd(g d)fd*d+d,d-d.gfg          d/             ZAd0 ZBej9        :                    d1d2i d3d4gfd5i d3gfd6d7d8id9d:gfg          d;             ZCd< ZDd= ZEd> ZFd? ZGej9        :                    d@edAdBdCdDdEgfe%dFdGdHdDg fe!dIdJdHdDdEgfe"dKdKdHdDdLdMgfedNdOddDg fedPdQd4dDg f eedRS          dTdUd4dDg fg          dV             ZHej9        :                    dWeejI        eJfeejI        ejI        feejI        eJfe!ejI        eJfe"ejI        ejI        fe%ejI        eJfg          dX             ZKdY ZLdZ ZMd[ ZNd\ ZOd] ZPd^ ZQd_ ZRd` ZSda ZTdb ZUdS )c    N)partial)	resources)Path)dumpsloadsMock)	HTTPError)urlparse)clear_data_home
fetch_fileget_data_homeload_breast_cancerload_diabetesload_digits
load_files	load_irisload_linnerudload_sample_imageload_sample_images	load_wine)RemoteFileMetadata$_derive_folder_and_filename_from_url_fetch_remoteload_csv_dataload_gzip_compressed_csv_datacheck_as_frame)scale)Bunchc                       e Zd ZdZd Zd ZdS )
_DummyPathz8Minimal class that implements the os.PathLike interface.c                     || _         d S Npath)selfr&   s     `/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/sklearn/datasets/tests/test_base.py__init__z_DummyPath.__init__0   s    			    c                     | j         S r$   r%   )r'   s    r(   
__fspath__z_DummyPath.__fspath__3   s
    yr*   N)__name__
__module____qualname____doc__r)   r,    r*   r(   r"   r"   -   s8        BB      r*   r"   c                 p    t           j                            |           rt          j        |            d S d S r$   )osr&   isdirshutilrmtreer%   s    r(   _remove_dirr7   7   s8    	w}}T d r*   module)scopec              #   t   K   t          |                     d                    }|V  t          |           d S )Nscikit_learn_data_home_teststrmktempr7   tmpdir_factorytmp_files     r(   	data_homerB   <   s?      >(()FGGHHH
NNNr*   c              #   t   K   t          |                     d                    }|V  t          |           d S )Nscikit_learn_load_files_testr<   r?   s     r(   load_files_rootrE   C   s?      >(()GHHIIH
NNNr*   c              #      K   t          j        |           }t          j        |d          }|                    d           |                                 t          |          V  t          |           d S )NdirF)rH   delete   Hello World!
)tempfilemkdtempNamedTemporaryFilewritecloser=   r7   )rE   test_category_dir1sample_files      r(   test_category_dir_1rR   J   s      !)o>>>-2DUSSSK'(((
 
!
!!!!"#####r*   c              #   t   K   t          j        |           }t          |          V  t          |           d S )NrG   )rK   rL   r=   r7   )rE   test_category_dir2s     r(   test_category_dir_2rU   T   sD      !)o>>>
 
!
!!!!"#####r*   path_containerc                 p   |  | |          }t          |          }||k    sJ t          j                            |          sJ |  | |          }t	          |           t          j                            |          rJ t          |          }t          j                            |          sJ d S )N)rB   )r   r3   r&   existsr   )rV   rB   s     r(   test_data_homerY   [   s     !"N9--		222I	!!!!7>>)$$$$$ !"N9--	i((((w~~i((((( 	222I7>>)$$$$$$$r*   c                     t          |           }t          |j                  dk    sJ t          |j                  dk    sJ |j        J d S )Nr   )r   len	filenamestarget_namesDESCR)rE   ress     r(   test_default_empty_load_filesr`   o   sZ    
_
%
%Cs}""""s  A%%%%9r*   c                     t          |          }t          |j                  dk    sJ t          |j                  dk    sJ |j        J |j        dgk    sJ d S )N      rJ   )r   r[   r\   r]   r^   datarR   rU   rE   r_   s       r(   test_default_load_filesrf   v   sq    
_
%
%Cs}""""s  A%%%%98)*******r*   c                 h   t           j                            |                               t           j                                                  }t          |d|gd          }t          |j                  dk    sJ t          |j	                  dk    sJ |j
        dk    sJ |j        dgk    sJ d S )Ntestutf-8)description
categoriesencodingrb   zHello World!
)r3   r&   abspathsplitseppopr   r[   r\   r]   r^   rd   )rR   rU   rE   categoryr_   s        r(   .test_load_files_w_categories_desc_and_encodingrr   ~   s     w23399"&AAEEGGH
V
W  C s}""""s  A%%%%98()))))))r*   c                     t          |d          }t          |j                  dk    sJ t          |j                  dk    sJ |j        J |                    d          J d S )NF)load_contentrb   rc   rd   )r   r[   r\   r]   r^   getre   s       r(   test_load_files_wo_load_contentrv      sw     _5
9
9
9Cs}""""s  A%%%%9776??"""""r*   allowed_extensionsz.txtz.jsonc                    | dz                                    d}fd|D             }|D ]}|                    d           t          |           }t          fd|D                       t          |j                  k    sJ dS )z;Check the behaviour of `allowed_extension` in `load_files`.sub)z	file1.txtz
file2.jsonz
file3.jsonzfile4.mdc                     g | ]}|z  S r1   r1   ).0fds     r(   
<listcomp>z6test_load_files_allowed_extensions.<locals>.<listcomp>   s    """qQU"""r*   s   hello)rw   c                 @    g | ]}|j         v t          |          S r1   )suffixr=   )r{   prw   s     r(   r~   z6test_load_files_allowed_extensions.<locals>.<listcomp>   s,    HHH15G)G)GA)G)G)Gr*   N)mkdirwrite_bytesr   setr\   )tmp_pathrw   filespathsr   r_   r}   s    `    @r(   "test_load_files_allowed_extensionsr      s     	5AGGIIIAE""""E"""E    	h
X2D
E
E
ECHHHHHHHIISN N      r*   zHfilename, expected_n_samples, expected_n_features, expected_target_nameszwine_data.csv      )class_0class_1class_2iris.csv      )setosa
versicolor	virginicazbreast_cancer.csv9     	malignantbenignc                     t          |           \  }}}|j        d         |k    sJ |j        d         |k    sJ |j        d         |k    sJ t          j                            ||           d S )Nr   rb   )r   shapenptestingassert_array_equal)filenameexpected_n_samplesexpected_n_featuresexpected_target_namesactual_dataactual_targetactual_target_namess          r(   test_load_csv_datar      s     7DH6M6M3K 3Q#55555Q#66666q!%77777J!!"57LMMMMMr*   c                     d} d}t          |           }t          | |          }t          |          dk    sJ t          |          dk    sJ t          j                            |d         |d                    t          j                            |d         |d                    t          j                            |d	         |d	                    |d
                             d          sJ d S )Nr   ziris.rstdata_file_namer   descr_file_namer      r   rb   rc   z.. _iris_dataset:)r   r[   r   r   r   
startswith)r   r   res_without_descrres_with_descrs       r(   test_load_csv_data_with_descrr      s    N O%^DDD"%  N ~!#### !!Q&&&&J!!."35Fq5IJJJJ!!."35Fq5IJJJJ!!."35Fq5IJJJ"(()<=======r*   z filename, kwargs, expected_shapezdiabetes_data_raw.csv.gz  
   diabetes_target.csv.gzzdigits.csv.gz	delimiter,  A   c                 T    t          | fi |}|j        t          |          k    sJ d S r$   )r   r   tuple)r   kwargsexpected_shaper   s       r(   "test_load_gzip_compressed_csv_datar      s;     0CCFCCKn 5 5555555r*   c                      d} d}t          |           }t          | |          \  }}t          j                            ||           |                    d          sJ d S )Nr   zdiabetes.rstr   r   z.. _diabetes_dataset:)r   r   r   r   r   )r   r   expected_datar   descrs        r(   -test_load_gzip_compressed_csv_data_with_descrr      sv    -N$O1PPPM6%'  K
 J!!+}===34444444r*   c                  "   	 t                      } t          | j                  dk    sJ t          | j                  dk    sJ | j        }t	          j        |d         ddd d f         t	          j        g dt          j                  k              sJ t	          j        |d         ddd d f         t	          j        g dt          j                  k              sJ | j        sJ d S # t          $ r t          j        d           Y d S w xY w)Nrc   r   )         )dtyperb   )rc      r   3Could not load sample images, PIL is not available.)r   r[   imagesr\   r   allarrayuint8r^   ImportErrorwarningswarn)r_   r   s     r(   test_load_sample_imagesr      s#   M ""3:!####3=!!Q&&&& vfQi1aaa(BH___BH,U,U,UUVVVVVvfQi1aaa(BH[[[,Q,Q,QQRRRRRy M M MKLLLLLLMs   C(C, ,DDc                      	 t          d          } | j        dk    sJ | j        dk    sJ d S # t          $ r t	          j        d           Y d S w xY w)Nz	china.jpgr   )i  i  r   r   )r   r   r   r   r   r   )chinas    r(   test_load_sample_imager      sz    M!+..{g%%%%{m++++++ M M MKLLLLLLMs   )- AAc                  P   t          d          } | j        j        dk    sJ | j        j        s
J d            t          | j                  dk    sJ | j        sJ t                      }t          j	        
                    t          | j                  dz  |j        d           d	S )
zTest to check that we load a scaled version by default but that we can
    get an unscaled version when setting `scaled=False`.F)scaledr   r   r   r   gT5@g-C6?)atolN)r   rd   r   targetsizer[   feature_namesr^   r   r   assert_allcloser   )diabetes_rawdiabetes_defaults     r(   test_load_diabetes_rawr      s     !...L"i////#((S(((|)**b0000$Jl  H-/?/D5      r*   zEloader_func, data_shape, target_shape, n_target, has_descr, filenames)r   r   )r   rc   Tr   )r   r   )r   r   )r   r   )r   )   r   data_filenametarget_filenamer   )r   )r   @   )r   	   )n_class)Q  r   )r   c                     |             t          t                    sJ j        j        |k    sJ j        j        |k    sJ t          d          r t          j                  |d         k    sJ |t          j                  |k    sJ |r	j	        sJ |r#dv sJ t          fd|D                       sJ d S d S )Nr   rb   data_modulec                     g | ];}|v o4t          j        d                    |         z                                  <S )r   )r   r   is_file)r{   r|   bunchs     r(   r~   ztest_loader.<locals>.<listcomp>/  s\         U
 Q_U=%9::U1XENNPP  r*   )
isinstancer    rd   r   r   hasattrr[   r   r]   r^   r   )loader_func
data_shapetarget_shapen_target	has_descrr\   r   s         @r(   test_loaderr     s-   ( KMMEeU#####:z))))<----uo&& 95&'':a=88885%&&(2222 { 
%%%%    #  
 
 	
 	
 	

 
	
 	
r*   z%loader_func, data_dtype, target_dtypec                 @     |             }t          || ||           d S )N)expected_data_dtypeexpected_target_dtyper   )r   
data_dtypetarget_dtypedefault_results       r(   test_toy_dataset_frame_dtyper   7  s<     ![]]N&*	     r*   c                      t          d          } t          t          |                     }d|_        |d         |j        k    sJ d S )Nx)r   y)r    r   r   r   r   bunch_from_pkls     r(   test_loads_dumps_bunchr   L  sI    CLLLE5<<((NN#."2222222r*   c                      t          d          } d| j        d<   t          t          |                     }|j        dk    sJ |d         dk    sJ d|_        |j        dk    sJ |d         dk    sJ d S )Noriginal)keyzset from __dict__r   changed)r    __dict__r   r   r   r   s     r(   8test_bunch_pickle_generated_with_0_16_and_read_with_0_17r  S  s    j!!!E 0EN55<<((N++++% J.... #N****% I------r*   c                  H    t                      } dt          |           v sJ d S )Nrd   )r   rH   )rd   s    r(   test_bunch_dirr  h  s(    ;;DSYYr*   c                      d} t          j        t          |           5  ddlm} ddd           n# 1 swxY w Y   d} t          j        t          |           5  ddlm} ddd           dS # 1 swxY w Y   dS )zLCheck that we raise the ethical warning when trying to import `load_boston`.z8The Boston housing prices dataset has an ethical problemmatchr   )load_bostonNzBcannot import name 'non_existing_function' from 'sklearn.datasets')non_existing_function)pytestraisesr   sklearn.datasetsr	  r
  )msgr	  r
  s      r(   test_load_boston_errorr  n  s   
DC	{#	.	.	. 1 10000001 1 1 1 1 1 1 1 1 1 1 1 1 1 1 OC	{#	.	.	. ; ;::::::; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ;s   155A--A14A1c           
      :   d}t          d|d          }t          t          |dddt          j                                        }|                     d|           t          j        t          d	
          5 }t          j	        t          d
          5  t          |dd           ddd           n# 1 swxY w Y   |j        dk    sJ |D ]}t          |j                  d| k    sJ  t          |          dk    sJ 	 ddd           dS # 1 swxY w Y   dS )z'Check retry mechanism in _fetch_remote.z8https://scikit-learn.org/this_file_does_not_exist.tar.gzinvalid_fileN  	Not Found)urlcoder  hdrsfpside_effect"sklearn.datasets._base.urlretrievezRetry downloadingr  zHTTP Error 404r   r   )	n_retriesdelayr   zRetry downloading from url: )r   r	   r
   ioBytesIOsetattrr  warnsUserWarningr  r   
call_countr=   messager[   )monkeypatchr  invalid_remote_fileurlretrieve_mockrecordrs         r(   1test_fetch_remote_raise_warnings_with_invalid_urlr)  z  s    EC,^S$GG#;Tbjll
 
 
  
 <>NOOO	k)<	=	=	=  ]9,<=== 	E 	E-!DDDD	E 	E 	E 	E 	E 	E 	E 	E 	E 	E 	E 	E 	E 	E 	E  *a//// 	J 	JAqy>>%IC%I%IIIIII6{{a                                   s7   8DB3'D3B7	7D:B7	;ADDDc                     t          d          \  } }| dk    sJ |dk    sJ t          d          \  } }| dk    sJ |dk    sJ t          d          \  } }| dk    sJ |dk    sJ t          d          \  } }| dk    sJ |d	k    sJ t          d
          \  } }| dk    sJ |d	k    sJ t          d          \  } }| dk    sJ |dk    sJ t          d          \  } }| dk    sJ |dk    sJ t          d          \  } }| dk    sJ |dk    sJ t          d          \  } }| dk    sJ |dk    sJ t          d          \  } }| dk    sJ |dk    sJ t          d          \  } }| dk    sJ |dk    sJ t          d          \  } }| dk    sJ |d	k    sJ t          j        t          d          5  t          d           d d d            d S # 1 swxY w Y   d S )Nzhttps://example.com/file.tar.gzexample.comzfile.tar.gzu2   https://example.com/نمونه نماینده.datau   نمونه-نماینده.dataz)https://example.com/path/to-/.file.tar.gzzexample.com/path_tozhttps://example.com/downloaded_filezhttps://example.comz2https://example.com/path/@to/data.json?param=valuez	data.jsonz4https://example.com/path/@@to._/-_.data.json.#anchorz"https://example.com//some_file.txtzsome_file.txtzhttp://example/../some_file.txtexamplez'https://example.com/!.'.,/some_file.txtz+https://example.com/a/!.'.,/b/some_file.txtzexample.com/a_bzhttps://example.com/!.'.,zInvalid URLr  z
https:/../)r   r  r  
ValueError)folderr   s     r(   (test_derive_folder_and_filename_from_urlr0    sC   ;) FH ]""""}$$$$;< FH ]""""77777;3 FH *****}$$$$;<RSSFH]""""(((((;<QRRFH]""""(((((;< FH *****{"""";> FH *****{"""";, FH ]""""&&&&;) FH Y&&&&;1 FH ]""""&&&&;5 FH &&&&&&&&&;<WXXFH]""""(((((	z	7	7	7 ; ;,\:::; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ;s   4GGGc                 .      fd}t          |          S )Nc                     t                    }t          |           j                            d          }||z                                  st          | ddd d           t          j        ||z  |           d S )N/r  r  )r   r   r&   striprX   r
   r5   copy)r  
local_pathserver_root	file_pathserver_sides       r(   _urlretrieve_mockz,_mock_urlretrieve.<locals>._urlretrieve_mock  sx    ;''SMM&,,S11	i'//11 	?Ck4>>>K)+Z88888r*   r  r   )r9  r:  s   ` r(   _mock_urlretriever;    s0    9 9 9 9 9 -....r*   c                 <   t          |          }|dz  }|                                 |dz  }d}|                    |d           |dz  }|                                 |dz  }|                    dd           |d	z  }|                                 t          |          }|                     d
|           |                     dt          |                     t          d          }	|	|dz  dz  k    sJ |	                    d          |k    sJ t          d          }	|	|dz  dz  dz  k    sJ |	                    d          |                    d          k    sJ t          j	        d          }
t          j        t                    5  t          j        |
          5  t          dd           d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   |dz  dz  }t          |                                          |dz  gk    sJ d S )Nr9  
data.jsonl{"a": 1, "b": 2}
ri   rl   	subfolderzother_file.txtzSome important text data.rB   r  z$sklearn.datasets._base.get_data_home)return_valuehttps://example.com/data.jsonlr+  z,https://example.com/subfolder/other_file.txtzERetry downloading from url: https://example.com/subfolder/invalid.txtr  z)https://example.com/subfolder/invalid.txtr   )r  )r   r   
write_textr;  r  r	   r   	read_textreescaper  r  r
   r   sortediterdir)r$  tmpdirr9  	data_fileserver_dataserver_subfolderother_data_filerB   r&  fetched_file_pathexpected_warning_msglocal_subfolders               r(   test_fetch_file_using_data_homerQ    s   &\\F=(Kl*I&Kw777"[0&)99O:WMMM$IOO(55<>NOOO.)0L0L0L   #(  	M 9L HHHHH&&&88KGGGG"6  	Y6DGWWWWWW&&&88O<U<U= =     9O  
y	!	!  \ 4555 	 	;   	 	 	 	 	 	 	 	 	 	 	 	 	 	 	                -/+=O/))++,,CS1S0TTTTTTTs6   G)G;GG	GG	GG"%G"c                    |                     d          }t          |dz            }d}|                    |d           |                     d          }t          |          }|                     d|           t          d|	          }||dz  k    sJ |                    d          |k    sJ |j        d
k    sJ t          d|	          }||dz  k    sJ |                    d          |k    sJ |j        d
k    sJ |                                 t          d|	          }||dz  k    sJ |                    d          |k    sJ |j        dk    sJ d S )Nr9  r=  r>  ri   r?  client_sider  rB  r/  rb   rc   )	r   r   rC  r;  r  r   rD  r"  unlink)r$  rI  r9  rJ  rK  rS  r&  rN  s           r(   test_fetch_file_without_sha256rV    s   ,,}--K[</00I&Kw777,,}--K(55<>NOOO #(   l :::::&&&88KGGGG&!++++ #(   l :::::&&&88KGGGG&!++++ "(   l :::::&&&88KGGGG&!++++++r*   c                    |                     d          }t          |dz            }d}|                    |d           t          j        |                                                                          }|                     d          }t          |          }|                     d|           t          d||	          }||dz  k    sJ |
                    d          |k    sJ |j        d
k    sJ t          d||	          }||dz  k    sJ |
                    d          |k    sJ |j        d
k    sJ |                    dd           d| d}	t          j        |	          5  t          d||	          }||dz  k    sJ |
                    d          |k    sJ |j        dk    sJ 	 d d d            n# 1 swxY w Y   t          d||	          }||dz  k    sJ |
                    d          |k    sJ |j        dk    sJ |                                 t          d||	          }||dz  k    sJ |
                    d          |k    sJ |j        dk    sJ t          d|          }||dz  k    sJ |
                    d          |k    sJ |j        dk    sJ d}
d}t          j        d| d|
 d          }t          j        t$          |          5  t          j        |          5  t          d||
	           d d d            n# 1 swxY w Y   d d d            d S # 1 swxY w Y   d S )Nr9  r=  r>  ri   r?  rS  r  rB  )r/  sha256rb   zcorrupted contentszQSHA256 checksum of existing local file data.jsonl \(.*\) differs from expected \(z9\): re-downloading from https://example.com/data.jsonl \.r  rc   r   rT  deadbabecafebeefzdiffers from expectedz#The SHA256 checksum of data.jsonl (z) differs from expected (z).)r   r   rC  hashlibrX  
read_bytes	hexdigestr;  r  r   rD  r"  r  r   rU  rE  rF  r  OSError)r$  rI  r9  rJ  rK  expected_sha256rS  r&  rN  expected_msgnon_matching_sha256rO  expected_error_msgs                r(   test_fetch_file_with_sha256rb  ?  s)   ,,}--K[</00I&Kw777nY%9%9%;%;<<FFHHO,,}--K(55<>NOOO #(_   l :::::&&&88KGGGG&!++++ #(_   l :::::&&&88KGGGG&!++++   !5 HHH	A+:	A 	A 	A 
 
L	)	)	) 0 0&,[
 
 
 !K,$>>>>> **G*<<KKKK*a/////0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 #(_   l :::::&&&88KGGGG&!++++ "(_   l :::::&&&88KGGGG&!++++ #(   l :::::&&&88KGGGG&!++++ -2	-o 	- 	-(	- 	- 	-  
w&8	9	9	9  \ 4555 	 	0"*   	 	 	 	 	 	 	 	 	 	 	 	 	 	 	                 sI   1AGG	G	/L<L$L<$L(	(L<+L(	,L<<M M )VrZ  r  r3   rE  r5   rK   r   	functoolsr   	importlibr   pathlibr   pickler   r   unittest.mockr	   urllib.errorr
   urllib.parser   numpyr   r  r  r   r   r   r   r   r   r   r   r   r   r   r   sklearn.datasets._baser   r   r   r   r   "sklearn.datasets.tests.test_commonr   sklearn.preprocessingr   sklearn.utilsr    r"   r7   fixturerB   rE   rR   rU   markparametrizerY   r`   rf   rr   rv   r   r   r   r   r   r   r   r   r   float64intr   r   r  r  r  r)  r0  r;  rQ  rV  rb  r1   r*   r(   <module>rt     s    				 				 				                                    " " " " " " ! ! ! ! ! !                                              > = = = = = ' ' ' ' ' '               
 h    h    $ $ $ $ $ $ )D$
+CDD% % ED%&  + + +* * *# # # -67:K/LMM  NM N	#r#D#D#DE	S!BBBC	c2X'>? N N N> > >$ &	#R#r3	!2u-	;,tRj9 6 6 6
5 5 5M M M M M M    K	Y4*F	Ivq$3	Hfa
|</0	
 
	64r:	j'2tR8	a	(	(	(*gr4L &
 
' &
. +	RZ-	
BJ/	bj#&	BJ$	
BJ/	BJ$
 
 
 
3 3 3. . .*  	; 	; 	;     .D; D; D;N/ / /1U 1U 1Uh%, %, %,PS S S S Sr*   