
    0Ph                         d Z ddlmZ ddlmZ ddlZddlZddlm	Z
 ddlmZmZmZ ddlmZ ddlmZ d Zd	 Zd
 Zd Zd Zd Zd ZdS )zTest the 20news downloader, if the data is available,
or if specifically requested via environment variable
(e.g. for CI jobs).    )partial)patchN)check_as_framecheck_pandas_dependency_messagecheck_return_X_y	normalize)assert_allclose_dense_sparsec                 n    | dd          }|j                             d          sJ  | d|j        ddd         d          }|j        |j        dd          k    sJ t          j        |j                                                  d	d
gk    sJ t          |j                  t          |j                  k    sJ t          |j                  t          |j	                  k    sJ |j	        d	         }|j        |j        d	                  }|j        
                    |          }|j	        t          j        |j        |k              d	         d	                  }||k    sJ  | ddd          \  }}t          |          t          |j	                  k    sJ |j        |j        j        k    sJ d S )NallF)subsetshuffle.. _20newsgroups_dataset:)r   
categoriesr   r      T)r   r   
return_X_y)DESCR
startswithtarget_namesnpuniquetargettolistlen	filenamesdataindexwhereshape)	fetch_20newsgroups_fxtr   	data2catsentry1categorylabelentry2Xys	            b/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/sklearn/datasets/tests/test_20news.pytest_20newsr,      s   !!>>>D:  !<===== '&!22b8!<e  I
 !T%6rss%;;;;;9Y%&&--//Aq69999 y"##s9+;'<'<<<<<y"##s9>':'::::: ^AF%i&6q&9:H##H--EYrxu 455a8;<FV "!$OOODAqq66S^^####7dk'''''''    c                 ,    | d          }t          |d                   t          |j                  k    sJ t          |d                   t          |j                  k    sJ t          |d                   t          |j                  k    sJ dS )zuChecks the length consistencies within the bunch

    This is a non-regression test for a bug present in 0.16.1.
    r   r   r   r   r   N)r   r   r   r   )r#   r   s     r+   test_20news_length_consistencyr0   5   s     "!///DtF|DI....tH~#dk"2"22222tK !!S%8%8888888r-   c                     | d          }t          j        |j                  r|j        j        dk    sJ |j        j        dk    sJ |j        j        d         dk    sJ |j        j        t          j        k    sJ |j	        
                    d          sJ  | d          }t          j        |j                  r|j        j        dk    sJ |j        j        d	k    sJ |j        j        d         d
k    sJ |j        j        t          j        k    sJ |j	        
                    d          sJ t          | d          }t          ||            | d          }t          j        |j                  r|j        j        dk    sJ |j        j        dk    sJ |j        j        d         dk    sJ |j        j        t          j        k    sJ |j	        
                    d          sJ d S )Ntrainr/   csr)2,  ; r   r4   r   test)l  r5   r7   r   )I  r5   r8   )spissparser   formatr"   r   dtyper   float64r   r   r   r   )!fetch_20newsgroups_vectorized_fxtbunch
fetch_funcs      r+   test_20news_vectorizedrA   A   s   --W===E;uz""Auz'8E'A'A'A'A:....<a E)))):rz))));!!"=>>>>> .-V<<<E;uz""Auz'8E'A'A'A'A:~----<a D((((:rz))));!!"=>>>>> :6JJJJUJ''' .-U;;;E;uz""Auz'8E'A'A'A'A:55555<a L0000:rz))));!!"=>>>>>>>r-   c                 @    | d          } | d          }|d         d d         }|d         d d         }t          |t          |                     t          j        t          j                            |                                d          d          sJ d S )NFr   Tr   d   r   )axis)r
   r	   r   allcloselinalgnormtodense)r>   r)   X_X_norms       r+   test_20news_normalizationrK   _   s    ))E:::A	*	*T	:	:	:BZF	&	$3$A 1666;ry~~fnn&6&6Q~??CCCCCCCr-   c                 l   t          j        d           | d          }t          ||            |j        }|j        dk    sJ t          fd|j        j        D                       sJ dD ]}||                                v sJ d|                                v sJ |j	        j
        dk    sJ d S )NpandasTas_frame)r4   i< c                 :    g | ]}t          |j                  S  )
isinstanceSparseDtype).0colpds     r+   
<listcomp>z(test_20news_as_frame.<locals>.<listcomp>q   s%    MMMC
3//MMMr-   )beginner	beginners	beginning
beginningsbeginsbegleybegonecategory_class)pytestimportorskipr   framer"   r   r   dtypeskeysr   name)r>   r?   rb   expected_featurerV   s       @r+   test_20news_as_framerg   i   s    		X	&	&B--t<<<E5;<<<KE;/))))MMMM5:;LMMMNNNNN 	0 	0  5::<</////uzz||++++< 0000000r-   c                 $    t          |            d S )N)r   )r>   hide_available_pandass     r+   test_as_frame_no_pandasrj      s    #$EFFFFFr-   c                 B   t          d          5 }t          d          5 }d|_        d|_        d}t          j        t          |          5   | d           d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   d d d            d S # 1 swxY w Y   d S )Nzos.path.existszjoblib.loadT)r)   r*   zThe cached dataset located in)matchrN   )r   return_valuer`   raises
ValueError)r>   mock_is_exist	mock_loaderr_msgs       r+   test_outdated_picklers      s   		 	  AM=!! 	AY)-M&%/I"5Gz999 A A114@@@@A A A A A A A A A A A A A A A	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	AA A A A A A A A A A A A A A A A A AsX   B,A<A%A<%A))A<,A)-A<0B<B 	 BB 	BBB)__doc__	functoolsr   unittest.mockr   numpyr   r`   scipy.sparsesparser9   "sklearn.datasets.tests.test_commonr   r   r   sklearn.preprocessingr	   sklearn.utils._testingr
   r,   r0   rA   rK   rg   rj   rs   rQ   r-   r+   <module>r}      s3                                   
 , + + + + + ? ? ? ? ? ?( ( (@	9 	9 	9? ? ?<D D D1 1 12G G G	A 	A 	A 	A 	Ar-   