
    0Ph                         d dl Z d dlmZ d dlZd dlmZmZmZmZ ej	        
                    dg dddgfg ddgfg dg fg          d             Zd	 Zej	        
                    d
eeg          d             Zej	        
                    d
eeg          d             Zej	        
                    d
 ej        eej	        j                  eg          d             ZdS )    N)BytesIO)_liac_arff_parser_pandas_arff_parser_post_process_frameload_arff_from_gzip_filezfeature_names, target_names)col_int_as_integercol_int_as_numericcol_float_as_realcol_float_as_numericcol_categorical
col_stringc           	         t          j        d          }|                    g dg dg dg dg dg dd          }t          || |          \  }}t	          ||j                  sJ t          |          dk    rt	          ||j                  sJ dS t          |          dk    rt	          ||j                  sJ dS |J dS )	zNCheck the behaviour of the post-processing function for splitting a dataframe.pandas)         )g      ?g       @g      @)abc)r   r	   r
   r   r   r   r   r   N)pytestimportorskip	DataFramer   
isinstancelenSeries)feature_namestarget_namespd
X_originalXys         g/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/sklearn/datasets/tests/test_arff_parser.pytest_post_process_framer#      s    D 
	X	&	&B"+))"+))!0$3OO.)//	
 	
	 	J z=,GGDAqa&&&&&
<A!R\*******	\		a		!RY'''''''yyyyy    c            	          d} t          j        t          |           5  t          dddddd           ddd           dS # 1 swxY w Y   dS )z3An error will be raised if the parser is not known.z8Unknown parser: 'xxx'. Should be 'liac-arff' or 'pandas')matchxxxN)r   raises
ValueErrorr   )err_msgs    r"   *test_load_arff_from_gzip_file_error_parserr+   G   s    
 IG	z	1	1	1 K K ueUEJJJK K K K K K K K K K K K K K K K K Ks   A  AAparser_funcc                    t          j        d          }t          t          j        d                              d                    }dddddddd	dd
ddd}g d}dg}d| t          u rdnd| t          u rdnddd} | |d|||          \  }}}}|j                                        ||z   k    sJ |j	        
                    |j        d         |                    |d                     dS )z9Check that we properly strip single quotes from the data.r   a5  
            @relation 'toy'
            @attribute 'cat_single_quote' {'A', 'B', 'C'}
            @attribute 'str_single_quote' string
            @attribute 'str_nested_quote' string
            @attribute 'class' numeric
            @data
            'A','some text','"expect double quotes"',0
            utf-8nominalcat_single_quote	data_typenamestringstr_single_quotestr_nested_quotenumericclass)r0   r5   r6   r8   )r0   r5   r6   A	some textz'some text'z"expect double quotes"z'"expect double quotes"'r   output_arrays_typeopenml_columns_infofeature_names_to_selecttarget_names_to_selectr3   N)r   r   r   textwrapdedentencoder   columnstolisttestingassert_series_equalilocr   	r,   r   	arff_filecolumns_infor   r   expected_values_frames	            r"   +test_pandas_arff_parser_strip_single_quotesrO   Q   s{    
	X	&	&B
	
 
	
 &// I  #&
 

 "&
 

 "&
 

 #
 
 L&  M
 9L  &*;;;KK /// %$- O ![#( -+  NAq% =!!]\%AAAAAJ""5:a="))ORS)2T2TUUUUUr$   c                    t          j        d          }t          t          j        d                              d                    }dddddddd	dd
ddd}g d}dg}ddddd} | |d|||          \  }}}}|j                                        ||z   k    sJ |j        	                    |j
        d         |                    |d                     dS )z9Check that we properly strip double quotes from the data.r   a5  
            @relation 'toy'
            @attribute 'cat_double_quote' {"A", "B", "C"}
            @attribute 'str_double_quote' string
            @attribute 'str_nested_quote' string
            @attribute 'class' numeric
            @data
            "A","some text","'expect double quotes'",0
            r.   r/   cat_double_quoter1   r4   str_double_quoter6   r7   r8   )rQ   rR   r6   r8   )rQ   rR   r6   r9   r:   z'expect double quotes'r   r;   r@   Nr   r   r   rA   rB   rC   rD   rE   rF   rG   rH   r   rI   s	            r"   +test_pandas_arff_parser_strip_double_quotesrT      sV    
	X	&	&B
	
 
	
 &// I  #&
 

 "&
 

 "&
 

 #
 
 L&  M
 9L  '4	 O ![#( -+  NAq% =!!]\%AAAAAJ""5:a="))ORS)2T2TUUUUUr$   )marksc                    t          j        d          }t          t          j        d                              d                    }dddddddd	dd
ddd}g d}dg}ddddd} | |d|||          \  }}}}|j                                        ||z   k    sJ |j        	                    |j
        d         |                    |d                     dS )z7Check that we properly parse with no quotes characters.r   a'  
            @relation 'toy'
            @attribute 'cat_without_quote' {A, B, C}
            @attribute 'str_without_quote' string
            @attribute 'str_internal_quote' string
            @attribute 'class' numeric
            @data
            A,some text,'internal' quote,0
            r.   r/   cat_without_quoter1   r4   str_without_quotestr_internal_quoter7   r8   )rW   rX   rY   r8   )rW   rX   rY   r9   r:   z'internal' quoter   r;   r@   NrS   rI   s	            r"   'test_pandas_arff_parser_strip_no_quotesrZ      sV    
	X	&	&B
	
 
	
 &// I  #'
 

 "'
 

 "(
 

 #
 
 L&  M
 9L !(0	 O ![#( -+  NAq% =!!]\%AAAAAJ""5:a="))ORS)2T2TUUUUUr$   )rA   ior   r   sklearn.datasets._arff_parserr   r   r   r   markparametrizer#   r+   rO   rT   paramxfailrZ    r$   r"   <module>rb      s                      !   -	
   	
   	
' @ A @2K K K ):<O(PQQCV CV RQCVL ):<O(PQQ<V <V RQ<V~  	&fk.?@@@ <V <V <V <V <Vr$   