
    0Phql              	          d dl Z d dlZd dlZd dlmZmZ d dlmZ d dl	m
Z
 d dlmZmZmZmZmZ d dlmZ d dlmZmZmZmZ d Zej                            d	 ej        g d
ej                  gdf ej        ddej        gej                  gdf ej        g de          gdfdg          ej                            dddg          ej                            dddg          d                                     Z ej                            d ej        g d
ej                  gdf ej        g de          gddgfg          ej                            d ej        g d           ej        g d          g          ej                            dddg          d                                     Z!ej                            d  ej        d gd!z  d"gd!z  z   d#gz   gej                  j"        g d
gf ej        d$gd!z  d%gd!z  z   d&gz   ge          j"        g d'gfg          ej                            dd(dg          d)                         Z#ej                            d*g d+d,f ej        g d-g dg          j"        d.fg          d/             Z$d0 Z%ej                            d1d"d2gd!z  d3d4gfg dd5z  d"d2gz   g d6fg d7d5z  d8d9gz   g d:fg          d;             Z&ej                            d<d=d>g          ej                            dddg          ej                            dg d?          d@                                     Z'ej                            dA ej        dBgdCz            dBf ej        d gdCz            d f ej        dDgdCz  e          d fgg dEF          ej                            dg dG          dH                         Z(dI Z)dJ Z*ej                            dg dK          dL             Z+ej                            ddMdg          dN             Z,dO Z-dS )P    N)assert_allcloseassert_array_equal)RandomForestRegressor)Ridge)KFoldShuffleSplitStratifiedKFoldcross_val_scoretrain_test_split)make_pipeline)KBinsDiscretizerLabelBinarizerLabelEncoderTargetEncoderc                 <   t          j        |t           j                  }t          j        |          }|dk    rt          j        |          }t          |          D ]k}|| |k             }|j        d         }	|	dk    r|||<   't          j        |          }
|
|z  }|	|	|z   z  }|t          j        |          z  d|z
  |z  z   ||<   l|S t          |          D ]@}|| |k             }t          j        |          ||z  z   }|j        d         |z   }||z  ||<   A|S )z0Simple Python implementation of target encoding.dtypeautor      )npzerosfloat64meanvarrangeshapesum)	X_ordinal	y_numericn_categoriessmoothcur_encodingsy_mean
y_variancecy_subsetn_iy_subset_variancemlambda_current_sumcurrent_cnts                  o/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/sklearn/preprocessing/tests/test_target_encoder.py_encode_targetr.      sG   H\<<<MWYFVI&&
|$$ 	T 	TA a0H.#Caxx#)a  "x 0 0!J.AS1WoG&):)::a'kV=SSM!|$$ 	9 	9A a0H&**Vf_<K".+f4K*[8M!    zcategories, unknown_valuer   r      r            ?      @      @)catdogsnakebear)r      r!         @r   target_typebinary
continuousc                    d}t          j        dgdz  dgdz  z   dgdz  z   gt           j                  j        }t          j        g d	gt           j                  j        }|j        d         }| d
k    r|}	|}
n| d         |         }	| d         |         }
t          j        |
|ggf          }
t           j                            |          }d}|dk    r>|                    dd|          }t          j        ddgt                    }||         }n"|dk    sJ |
                    dd|          }|}|                    |          }||         }|	|         }	||         }||         }|dk    rt          ||d          }nt          ||d          }t          j        |t           j                  }|                    ||          D ]>\  }}||df         ||         }}t#          ||||          }|||df                  ||df<   ?t%          || ||          }|                    |	|          }|j        |k    sJ t+          ||           t-          |j                  dk    sJ |dk    rt1          |j        |           n	|j        J t          j        |          }t#          |dddf         |||          }t+          |j        d         |           |j        t9          j        |          k    sJ t          j        |t          j        |g          f                              dd          }|                    |
          }t+          ||           dS )zCheck encoding for binary and continuous targets.

    Compare the values returned by `TargetEncoder.fit_transform` against the
    expected encodings for cv splits from a naive reference Python
    implementation in _encode_target.
    r:   r      r      r1   (   r   r0   r   r=   lowhighsizer6   r7   r>   Tn_splitsrandom_stateshuffle)r!   
categoriescvrJ   N) r   arrayint64Tr   concatenaterandomRandomStaterandintobjectuniformpermutationr	   r   
empty_liker   splitr.   r   fit_transformtarget_type_r   len
encodings_r   classes_r   target_mean_pytestapproxreshape	transform)rL   unknown_valueglobal_random_seedr!   r<   r    X_train_int_arrayX_test_int_array	n_samplesX_trainX_testdata_rngrI   r   target_namesy_trainshuffled_idxrM   expected_X_fit_transform	train_idxtest_idxX_y_r"   target_encoderX_fit_transformr#   expected_encodingsexpected_X_test_transformX_test_transforms                                 r-   test_encodingrz   7   s   & L1#(aS2X"5b"@!ARRRTx28<<<>!'*IV#!Q- 12A/0^V}o%6788Fy$$%788HHh$$$CC	xf===y) l****$$2I$FF	''	22L),7l#Gl#G,'I h,>
 
 
 H3EtTTT  "}->bjQQQ!xx(97CC 
 
	8"9a<0)I2FB&r2|VDD0=hk*1
 1-- #'	  N %227GDDO&+5555O%=>>>~())Q....h>2LAAAA&... WYF'!!!Q$L&  N-a02DEEE&&-*?*????? !#	RXvh//0! !gb!nn  &//77$&?@@@@@r/   zcategories, unknown_valuesrabbittarget_labels)r   r1   r:   )abr%   c           
      	   t           j                            |           }d}d}t          j        |                    dd|                    }t          j        |                    dd|                    }	|d         |         }
|d         |	         }t          j        |
|f          }t          j        ||	f          }ddgg dg}d}t          j        |                    d||                    }||         }t                                          |          }d}t          || d	          }t          j	        |j
        d         |j
        d         |z  ft           j        
          }t          |          D ]\  }}t          |          D ]x}|                    ||          D ]_\  }}|dd|f         }|||f         ||         }}t          ||t!          |          |          }|||z  z   }||||f                  |||f<   `yt#          |||           } |                     ||          }!| j        dk    sJ t'          |!|           g }"t          |          D ]a\  }}t          |          D ]L}|dd|f         }t          |dd|f         |t!          |          |          }|"                    |           Mbt!          | j                  ||z  k    sJ t          ||z            D ]#}#t'          | j        |#         |"|#                    $t-          | j        |           t          j        ddgddgddgg          }$|dk    r|$}%nyt          j        |$ddddf         t2          
          }%t          |$j
        d                   D ]!}&|d         |$dd|&f                  |%dd|&f<   "t          j        |%|f          }%t          j        |d          }'t          j	        |$j
        d         |$j
        d         |z  ft           j        
          }(|$j
        d         })g d}t          |)dz
            D ]2}*t          |"          D ] \  }#}+|+|$|*||#         f                  |(|*|#f<   !3g d},t          ||z            D ]}#|'|,|#                  |(|)dz
  |#f<   |                     |%          }-t'          |-|(           dS )z&Check encoding for multiclass targets.P   r1   r   rC   r:   r   r0   TrH   r   Nr!   rM   rJ   
multiclassr2      r   rN   axis)r   r   r   r   r   r   )r   r   r1   r   r   r1   )r   rS   rT   rO   rU   column_stackr   r[   r	   emptyr   r   	enumerater   rZ   r.   r]   r   r\   r   appendr^   r   r_   rY   rV   vstackr   rd   ).rf   rL   unknown_valuesr|   r!   rngri   
n_features
feat_1_int
feat_2_intfeat_1feat_2rj   X_train_intcategories_	n_classesy_train_intrn   y_train_encrI   rM   rp   f_idxcatsc_idxrq   rr   y_classrs   rt   current_encodingexp_idxru   rv   rw   i
X_test_intrk   
column_idxr#   rx   n_rowsrow_idxencmean_idxry   s.                                                 r-   test_encoding_multiclassr      s    )

 2
3
3CIJ#++!!)+DDEEJ#++!!)+DDEEJ]:&F]:&Fovv.//G/:z":;;Kq6999%KI(3;;199;MMNNKK(G ""0099KH	(:D
 
 
B
  "x		1	{03i?@j      !--  t9%% 	 	E')xx'A'A 
 
#	8%aaah/$Y%56	8JB#1"b#d))V#L#L   59#45>N%0?(7):;;
	 #'  N
 %227GDDO&,6666O%=>>>  -- 8 8t9%% 	8 	8E!!!!U(+G-AAAuH%wD		6    %%&67777	8 ~())Z)-CCCCC:	)** M M1!46H6KLLLL~.>>> Aq6Aq6Aq6233Jz#2#qqq&1@@@
 0 344 	O 	OJ$.qM*SbS*_2M$NF111j=!!FN344W[q)))F "		!	j.q1I=>j! ! ! a FE!$$ W W 233 	W 	WFAs47
7ERSHCT8U4V%gqj11	W "!!H9z)** G G39(1+3F!&1*a-00%//77$&?@@@@@r/   zX, categories
   r   r:   r6   r7   r8   )r7   r6   cow      @c                    t           j                            d          }|                    dd| j        d                   }t          ||d                              | |          }|                                }|                    | dd                   }|d         t          j
        |          k    sJ t          |j                  d	k    sJ |j        d         d         t          j
        |          k    sJ dS )
zHCustom categories with unknown categories that are not in training data.r   rG   r@   rC   )rL   r!   rJ   rN   N)r   r   r   )r   rS   rT   rW   r   r   fitr   rd   ra   rb   r]   r^   )XrL   r!   r   yr   r#   X_transs           r-   test_custom_categoriesr     s    $ )


"
"C"171:66A
:f1
M
M
M
Q
QRSUV
W
WC VVXXFmmAbccF##G4=FM&111111s~!####>!R FM&$9$9999999r/   zy, msg)r   r1   r   r   z'Found input variables with inconsistent)r   r1   r   z7Target type was inferred to be 'multiclass-multioutput'c                     t          j        g dg          j        }t                      }t	          j        t          |          5  |                    ||            ddd           dS # 1 swxY w Y   dS )zCheck invalidate input.)r   r   r   matchN)r   rO   rQ   r   ra   raises
ValueErrorr[   )r   msgr   r   s       r-   test_errorsr   5  s     	)))A
//C	z	-	-	-    !Q                                   s   A**A.1A.c                     t          j        g dg          j        } t          j        g d          }t          d          }t	          j        t          t          j        d                    5  |	                    | |           ddd           n# 1 swxY w Y   |j
        dk    sJ t          dd	
          }|	                    | |           |j
        d	k    sJ dS )z@Check inferred and specified `target_type` on regression target.)r   r   r   r   r   r   )r3          @r4   r   r4   r   r1   rM   zQThe least populated class in y has only 1 members, which is less than n_splits=2.r   Nr   r>   )rM   r<   )r   rO   rQ   r   ra   warnsUserWarningreescaper[   r\   )r   r   r   s      r-   test_use_regression_targetr   H  s7   
$$$%&&(A
///00A
1


C	i
 

 
 
     	!Q                              |++++
1,
7
7
7Ca|++++++s   0BBBzy, feature_namesr1   AB   )A_1A_2A_3B_1B_2B_3)y1y2y3r   r   )A_y1A_y2A_y3B_y1B_y2B_y3c                 @   t          j        d          }|                    ddgdz  ddgdz  d          }t          ddd	
          }|                    d           t          ddd	
          }|                    d           |                    ||           }|                    ||           }t          |                                |           t          |	                                |           t          |	                                |j
                   dS )z*Check TargetEncoder works with set_output.pandasr}   r~   r   r   r1   )r   r   r4   r   rM   r!   rJ   default)rd   N)ra   importorskip	DataFramer   
set_outputr[   r   to_numpyr   get_feature_names_outcolumns)r   feature_namespdX_dfenc_default
enc_pandas	X_defaultX_pandass           r-   !test_feature_names_out_set_outputr   ]  s    
	X	&	&B<<sCj2oQFRK@@AAD1SqAAAKY///!Ca@@@JH---))$22I''a00HH%%''333z7799=IIIz77998;KLLLLLr/   	to_pandasTF)binary-ints
binary-strr>   c                    t          j        ddgddgddgddgddgddgddgddggt           j                  }|dk    rJt          j        g d          }t                                          |          }t          ddd          }n|d	k    rJt          j        g d
          }t                                          |          }t          ddd          }n6t          j        g dt           j                  }|}t          ddd          }t          j        |          }g dddgg}t          j        ddgddgddggt           j                  }	| rt          j
        d          }
|
                    |dddf         t          j        ddgt                    |dddf                  d          }|
                    |	dddf         g dd          }	n|}t          j        |t           j                  }t          |          D ]g\  }}|                    ||          D ]K\  }}|||f         ||         }}t#          ||t%          |          |          }||||f                  |||f<   Lhg }t          |          D ]C\  }}t#          |dd|f         |t%          |          |          }|                    |           Dt          j        |d         d         |d         d         g||d         d         g|d         d         |ggt           j                  }t)          |dd          }|                    ||          }t+          ||           t%          |j                  dk    sJ t/          d          D ]#}t+          |j        |         ||                    $|                    |	          }t+          ||           dS )z,Check target encoder with multiple features.r   r   r1   r   r   )r}   r~   r}   r}   r~   r~   r}   r~   T)rJ   rK   r   )r:   r2   r:   r:   r:   r2   r2   r2   )r4   gffffff@g333333@g      @gffffff@g      @皙$@g333333@r0   r:   r   r   Nr6   r7   )feat0feat1)r7   r6   r8   r   )r   rO   rP   r   r[   r	   float32r   r   ra   r   r   rV   rY   r   r   rZ   r.   r]   r   r   r   r^   r   rd   )r   r!   r<   r   rn   	y_integerrM   r#   rL   rk   r   rj   rp   r   r   rq   rr   rs   rt   r   rw   rx   r   rv   r   ry   s                             r-   test_multiple_features_quickr   {  sX   
 
Q!Q!Q!Q!Q!Q!Q!QHPRPX  I l""(CCCDD NN0099	QQ===		%	%(33344 NN0099	QQ===(DDDBJWWW	11d333WYF))aV$JXFFG	

 h  F   **,,"111a45%.???	!!!Q$P 
 
 qqq!t?V?V?VWWXX  "}YbjIII ,,  t#%88Iy#A#A 	 	Ixy%/0)I2FB-b"c$iiHH8H(E/*9$Xu_55	  ,, 4 4t)aaahCIIv
 
 	!!"23333 ""1%'9!'<Q'?@'*1-."1%v.	

 j! ! ! v!!
<
<
<C''99OO%=>>>s~!####1XX B Bq)+=a+@AAAA}}V,,$&?@@@@@r/   z	y, y_meang333333@r@   r}   )r>   r=   zbinary-string)ids)r   r           c                 6   t          j        dgdz  g          j        }|j        d         }t	          d|d          }|                    ||           }t          |t          j        |gg|d                     |j        d         d         t          j
        |          k    sJ |j        t          j
        |          k    sJ t          j        dgdgg          }|                    |          }t          |t          j        |ggdd                     dS )z5Check edge case where feature and target is constant.r   r@   r   r1   r   r   N)r   rO   rQ   r   r   r[   r   repeatr^   ra   rb   r`   rd   )	r   r#   r!   r   ri   r   r   rk   X_test_transs	            r-    test_constant_target_and_featurer     s    	1#(A
I
1V!
<
<
<C1%%GGRYz91EEEFFF>!Q6=#8#88888v}V444444XsQCj!!F==((LL")fXJ"B"B"BCCCCCr/   c                    d}d}t           j                            |           }|                    |          }|                    d||                              dd          }|                                }||         }||         }t          d|           }|                    ||          }t          d	
          }|                    ||          }	t          dd|           }
t          d|           }t          |
|||                                          dk     sJ t          |
|||                                          dk     sJ t          |
|	||                                          dk    sJ d S )NrA   i  rF   r   rN   r   T)rK   rJ   F)rK   r   r@   )n_estimatorsmin_samples_leafrJ   2   )rI   rJ   r   皙?      ?)r   rS   rT   normalrU   rc   argsortr   r[   r   r   r
   r   )rf   cardinalityri   r   rn   rj   y_sorted_indicesru   X_encoded_train_shuffledX_encoded_train_no_shuffled	regressorrM   s               r-   Ftest_fit_transform_not_associated_with_y_if_ordinal_categorical_is_notr     s    KI
)

 2
3
3Cjjij((Gkk![yk99AA"aHHG ((&'G&'G"4>PQQQN-;;GWMM"5111N"0">">w"P"P &";M  I 
r0B	C	C	CB9gw2>>>CCEEKKKK	#;WLLLQQSS
	 	 	 	 		#>BOOOTTVV
	 	 	 	 	 	r/   c                  z   t          j        g dg          j        } t          j        g d          }t          ddd          }|                    | |          }t          |d         t          j        |dd	                              t          |d
         t          j        |d	d                              d	S )zECheck edge case with zero smoothing and cv does not contain category.)
r   r   r   r   r   r   r   r   r   r   )
g @g333333@g333333?g@r3   g      "@r   gffffff,@g*@g      .@r   Fr1   )r!   rK   rM   r   r   NrN   )r   rO   rQ   r   r[   r   r   )r   r   r   r   s       r-   test_smooth_zeror    s    
0001224A
GGGHHA
sEa
8
8
8C1%%G GAJ!""/// GBK2A200000r/   )r   g     @@r   c                    t           j                            |          }|                    d          }d}t	          |d                              |                    dd                    }t          |||          \  }}}}	|                    |          }
|
|	                    t           j
                           }|
|	                    t           j
                           }t          | |	          }|                    ||          }|                    |          }|                    ||          }|                    |          }t          ||           t          ||           d S )
Ni  r   rA   ordinal)n_binsencoderN   r   rJ   r!   rJ   )r   rS   rT   r   r   r[   rc   r   rX   astypeint32r   rd   r   )r!   rf   r   r   r    r   rj   rk   rn   y_testpermutated_labelsX_train_permutedX_test_permutedru   X_train_encodedX_test_encodedX_train_permuted_encodedX_test_permuted_encodeds                     r-   3test_invariance_of_encoding_under_label_permutationr  )  sa   
 )

 2
3
3C 	


ALY???MM			"a	 	A (8	1-( ( ($GVWf 55()A)AB'bh(?(?@O"&?QRRRN$227GDDO#--f55N-;;<LgVV,66GGO%=>>>N$;<<<<<r/   r   c                    t          ddd          }d}t          j                            |          }|                    |          }d|                    |          z  }d}t          |dd	|
                              ||z                       dd                    }|                    |          }	|	|	                    t          j
                           }|                    |          }
|                    t          d|z            |d                              dd          }t          j        ||
|gd          }t          ||d          \  }}}}|                    ||          }|                    ||          dk     sJ |                    ||          dk     sJ t#          t%          | |          |                              ||          }|d         j        }|                    ||          dk    s
J |            |                    ||          dk    s
J |            |d         t)          j        dd          k    sJ t          j        |dd                    dk                                     sJ t%          | |                              ||          }|                    |          }|                    |          }|                    ||          }|j        }|                    ||          dk    s
J |            |                    ||          dk     s
J |            t-          |d                   t-          |d                   k     sJ d S )Ngư>lsqrF)alphasolverfit_interceptiP  g?d   r  rW   )r  r  strategyrJ   rN   r   g?T)rF   replacer   r   r  r   r  r   g{Gz?)absg?gffffff?r1   )r   r   rS   rT   randnr   r[   rc   rX   r  r	  choiceintrR   r   r   scorer   r   coef_ra   rb   r  allrd   )r!   rf   linear_regressionri   r   r   noiser    X_informativer  
X_shuffledX_near_unique_categoriesr   rj   rk   rn   r
  	raw_modelmodel_with_cvcoefru   X_enc_no_cv_trainX_enc_no_cv_testmodel_no_cvs                           r-   *test_target_encoding_for_linear_regressionr-  M  s    DuMMM I
)

 2
3
3C		)A
 #))I&&&EL$	  
 mQY''A..//  55%m&:&:28&D&DEM //J  #zzC)O9d  *    gb!nn 
 		
$<=	 	 	A (811'M'M'M$GVWf
 "%%gw77I??7G,,s2222??66**S0000 "V#6668I 	c'7  "Dw003666666vv..444d444 7fmA40000000F48s"''))))) #&sCCCGG N '0099%//77#''(97CCK D.883>>>>>>-v66<<<d<<<
 tAw<<#d1g,,&&&&&&r/   c                  *   t          j        dd          } |                     dd          5  |                     g dg dd          }t	          d	
                              |dg         |d                    ddd           dS # 1 swxY w Y   dS )z
    Test target-encoder cython code when y is read-only.

    The numpy array underlying df["y"] is read-only when copy-on-write is enabled.
    Non-regression test for gh-27879.
    r   z2.0)
minversionzmode.copy_on_writeT)r}   r~   r~   )r   r;   r5   )xr   r>   )r<   r0  r   N)ra   r   option_contextr   r   r   )r   dfs     r-   test_pandas_copy_on_writer3    s     
	X%	8	8	8B			/	6	6 H H\\oooFFGG,///33BuIr#wGGGH H H H H H H H H H H H H H H H H Hs   ABBB).r   numpyr   ra   numpy.testingr   r   sklearn.ensembler   sklearn.linear_modelr   sklearn.model_selectionr   r   r	   r
   r   sklearn.pipeliner   sklearn.preprocessingr   r   r   r   r.   markparametrizerO   rP   nanr   rV   rz   r   rQ   r   r   r   r   r   r   r   r  r  r-  r3   r/   r-   <module>r?     s   				      = = = = = = = = 2 2 2 2 2 2 & & & & & &              + * * * * *             < 
"(999BH
-
-
-	.2
"(Cbf%RZ
8
8
8	93?
"(***&
9
9
9	:FC	  C=11<(@AA]A ]A BA 21 ]A@  
"(999BH
-
-
-	.7
"(***&
9
9
9	:VX<NO  hbhyyy))828OOO+D+DE  C=11eA eA 21  eAP  BHqcBh!r)QC/0AAACYYK	

 BH2",y89  """#		
  C=11: : 21  :  	@ABHiii+,,.E	
	 	   	 	 , , ,* 
Q"sCj!	Q!Q	!K!K!KL"dD\1<<<	

 
M M
 
M& tUm44C=11(S(S(STTOA OA UT 21 54OAd 	3%"*		s#	1#(		Q	3%"*F	+	+	+Q/
 	211    #5#5#566D D 76 D + + +\1 1 1" #5#5#566 =  = 76 =F C=11o' o' 21o'd
H 
H 
H 
H 
Hr/   