
    0Ph4              	          d Z ddlZddlmZmZ ddlZddlZddlm	Z	 ddl
mZmZmZ ddlmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZmZmZmZ ddlmZm Z   e            Z! e            Z"d Z#ej$        %                    dee z             d             Z&d Z'd Z(d Z)d Z*d Z+ej$        %                    dddg          d             Z,d Z-d Z.d Z/d Z0d Z1 ed ed1i dd i!          ej$        %                    d"d#d$g          d%                         Z2 ed ed1i dd&i!          ej$        %                    d"d#d$g          d'                         Z3d( Z4ej$        %                    d)e          d*             Z5d+ Z6ej$        %                    dee z             d,             Z7ej$        %                    d-d.d/g          ej$        %                    dddg          d0                         Z8dS )2zD
Testing for Isolation Forest algorithm (sklearn.ensemble.iforest).
    N)Mockpatch)parallel_backend)load_diabetes	load_irismake_classification)IsolationForest)_average_path_length)roc_auc_score)ParameterGridtrain_test_split)check_random_state)assert_allcloseassert_array_almost_equalassert_array_equalignore_warnings)CSC_CONTAINERSCSR_CONTAINERSc                 `   t          j        ddgddgg          }t          j        ddgddgg          }t          dgg dddgd          }t                      5  |D ]6}t	          dd	| i|                    |                              |           7	 d
d
d
           d
S # 1 swxY w Y   d
S )z6Check Isolation Forest for various parameter settings.r            )      ?      ?r   TF)n_estimatorsmax_samples	bootstraprandom_stateN )nparrayr   r   r	   fitpredict)global_random_seedX_trainX_testgridparamss        c/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/sklearn/ensemble/tests/test_iforest.pytest_iforestr*   "   s    hAA'((GX1v1v&''F]]]$PUWW D 
		   	 	FFF);FvFFJJ gfoooo	                 s   :B##B'*B'sparse_containerc                    t          |           }t          t          j        dd         |          \  }}t	          ddgddgd          } ||          } ||          }|D ]}t          dd	| d
|                    |          }	|	                    |          }
t          dd	| d
|                    |          }|                    |          }t          |
|           dS )z=Check IForest for various parameter settings on sparse input.N2   r   r   r   TF)r   r   
   )r   r   r   )	r   r   diabetesdatar   r	   r"   r#   r   )r$   r+   rngr%   r&   r'   X_train_sparseX_test_sparser(   sparse_classifiersparse_resultsdense_classifierdense_resultss                r)   test_iforest_sparser9   2   s4    /
0
0C&x}SbS'9LLLOGV#s4-PPQQD%%g..N$$V,,M : :+ 
*<
 
@F
 

#n

 	 +22=AA + 
*<
 
@F
 

#g,, 	 )0088>=9999: :    c                  v   t           j        } d}t          j        t          |          5  t          d                              |            ddd           n# 1 swxY w Y   t          j                    5  t          j	        dt                     t          d                              |            ddd           n# 1 swxY w Y   t          j                    5  t          j	        dt                     t          t          j        d                                        |            ddd           n# 1 swxY w Y   t          j        t                    5  t                                          |                               | ddd	df                    ddd           dS # 1 swxY w Y   dS )
z7Test that it gives proper exception on deficient input.3max_samples will be set to n_samples for estimationmatch  r   Nerrorautor   r   )irisr1   pytestwarnsUserWarningr	   r"   warningscatch_warningssimplefilterr    int64raises
ValueErrorr#   )Xwarn_msgs     r)   test_iforest_errorrO   L   s~   	A
 EH	k	2	2	2 1 1D)))--a0001 1 1 1 1 1 1 1 1 1 1 1 1 1 1		 	"	" 3 3g{333F+++//2223 3 3 3 3 3 3 3 3 3 3 3 3 3 3 
	 	"	" 8 8g{333BHQKK00044Q7778 8 8 8 8 8 8 8 8 8 8 8 8 8 8
 
z	"	" 3 3a  ((111abb52223 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3sI   $AA!A8>CC	C AD<<E E  AF..F25F2c            
         t           j        } t                                          |           }|j        D ]K}|j        t          t          j        t          j	        | j
        d                                       k    sJ LdS )zDCheck max_depth recalculation when max_samples is reset to n_samplesr   N)rC   r1   r	   r"   estimators_	max_depthintr    ceillog2shape)rM   clfests      r)   test_recalculate_max_depthrY   b   sy    	A





"
"C B B}BGBGAGAJ,?,?$@$@ A AAAAAAB Br:   c                     t           j        } t                                          |           }|j        | j        d         k    sJ t          d          }d}t          j        t          |          5  |                    |            d d d            n# 1 swxY w Y   |j        | j        d         k    sJ t          d                              |           }|j        d| j        d         z  k    sJ d S )Nr   i  r@   r<   r=   g?)	rC   r1   r	   r"   max_samples_rV   rD   rE   rF   )rM   rW   rN   s      r)   test_max_samples_attributer\   j   s'   	A





"
"Cqwqz))))
c
*
*
*CDH	k	2	2	2  


              qwqz))))
c
*
*
*
.
.q
1
1CsQWQZ///////s   3BBBc                    t          |           }t          t          j        |          \  }}t	          d|                               |          }|                    d           |                    |          }|                    d           |                    |          }t          ||           t	          d|                               |          }|                    |          }t          ||           dS )zCheck parallel regression.r.   r   )n_jobsr   r   r^   r   N)	r   r   r0   r1   r	   r"   
set_paramsr#   r   )r$   r2   r%   r&   ensembley1y2y3s           r)    test_iforest_parallel_regressionre   y   s    
/
0
0C&x}3GGGOGVa6HIIIMMgVVHq!!!			&	!	!Bq!!!			&	!	!Bb"%%%a6HIIIMMgVVH			&	!	!Bb"%%%%%r:   c                    t          |           }d|                    dd          z  }|                    t          j        |dz   |dz
  f                    }|dd         }|                    ddd	          }t          j        |dd         |f          }t          j        d
gdz  dgdz  z             }t          d|                              |          }|	                    |           }t          ||          dk    sJ dS )z#Test Isolation Forest performs wellg333333?iX  r   Nr?   r   )   r   )lowhighsizer   rh   d   )r   r   g\(\?)r   randnpermutationr    vstackuniformr!   r	   r"   decision_functionr   )	r$   r2   rM   r%   
X_outliersr&   y_testrW   y_preds	            r)   test_iforest_performanceru      s    /
0
0CciiQA	1q5!a%.1122AhG !(;;JY$%%*-..FXqcCi1#)+,,F c
<
<
<
@
@
I
IC ##F+++F ((4//////r:   contamination      ?rB   c           	         ddgddgddgddgddgddgddgddgg}t          || 	          }|                    |           |                    |           }|                    |          }t	          j        |dd                    t	          j        |d d                   k    sJ t          |d
dgz  ddgz  z              d S )Nrg   r   r         	   r   rv      )r	   r"   rq   r#   r    minmaxr   )rv   r$   rM   rW   decision_funcpreds         r)   test_iforest_worksr      s     bB8b"X1v1v1v1vAwOA '9
W
W
WCGGAJJJ**1---M;;q>>D6-$%%}SbS/A(B(BBBBBtQ!WqB4x/00000r:   c                      t           j        } t                                          |           }|j        |j        k    sJ d S N)rC   r1   r	   r"   r[   _max_samples)rM   rW   s     r)   test_max_samples_consistencyr      s?    	A





"
"Cs///////r:   c                     t          d          } t          t          j        d d         t          j        d d         |           \  }}}}t          d          }|                    ||           |                    |           d S )Nr   r-   r.   g?)max_features)r   r   r0   r1   targetr	   r"   r#   )r2   r%   r&   y_trainrs   rW   s         r)    test_iforest_subsampled_featuresr      s    
Q

C'7crcHOCRC0s( ( ($GVWf s
+
+
+CGGGWKKr:   c                     dt          j        d          t           j        z   z  dz
  } dt          j        d          t           j        z   z  dz
  }t          t	          dg          dg           t          t	          dg          dg           t          t	          d	g          d
g           t          t	          dg          | g           t          t	          dg          |g           t          t	          t          j        g d                    dd
| |g           t	          t          j        d                    }t          |t          j        |                     d S )N       @g      @g?g     0@g}?r   g        r   r   r        )r   r   r   r   )	r    logeuler_gammar   r
   r!   aranger   sort)
result_one
result_twoavg_path_lengths      r)    test_iforest_average_path_lengthr      s@    sbn45GJu67:MMJ(!--u555(!--u555(!--u555(!--
|<<<(#//*>>>RXnnn5566	c:z*  
 +29Q<<88O(@(@AAAAAr:   c                  ,   ddgddgddgg} t          d                              |           }t                                          |           }t          |                    ddgg          |                    ddgg          |j        z              t          |                    ddgg          |                    ddgg          |j        z              t          |                    ddgg          |                    ddgg                     d S )Nr   r   皙?)rv   r   )r	   r"   r   score_samplesrq   offset_)r%   clf1clf2s      r)   test_score_samplesr      s,   1v1v1v&G---11'::D  ))DS#J<((c
|,,t|;   S#J<((c
|,,t|;   S#J<(($*<*<sCj\*J*J    r:   c                  f   t          d          } |                     dd          }t          dd| d          }|                    |           |j        d         }|                    d           |                    |           t          |j                  dk    sJ |j        d         |u sJ dS )	z/Test iterative addition of iTrees to an iForestr      r   r/   T)r   r   r   
warm_start)r   N)r   rm   r	   r"   rQ   r`   len)r2   rM   rW   tree_1s       r)   test_iforest_warm_startr      s     Q

C		"aA Rcd  C GGAJJJ_QFNNN###GGAJJJs2%%%%?1''''''r:   z*sklearn.ensemble._iforest.get_chunk_n_rowsreturn_valuer   )side_effectzcontamination, n_predict_calls)rw   r   )rB   r   c                 @    t          ||           | j        |k    sJ d S r   r   
call_countmocked_get_chunkrv   n_predict_callsr$   s       r)   test_iforest_chunks_works1r     /     }&8999&/999999r:   r/   c                 @    t          ||           | j        |k    sJ d S r   r   r   s       r)   test_iforest_chunks_works2r     r   r:   c                     t          j        d          } t                      }|                    |            t           j                            d          }t          |                    |           dk              sJ t          |                    |                    dd                    dk              sJ t          |                    | dz             dk              sJ t          |                    | dz
            dk              sJ t          j	        |                    dd          dd          } t                      }|                    |            t          |                    |           dk              sJ t          |                    |                    dd                    dk              sJ t          |                    t          j        d                    dk              sJ |                    dd          } t                      }|                    |            t          |                    |           dk              sJ t          |                    |                    dd                    dk              sJ t          |                    t          j        d                    dk              sJ dS )z=Test whether iforest predicts inliers when using uniform data)rl   r/   r   r   rl   r/   N)
r    onesr	   r"   randomRandomStateallr#   rm   repeat)rM   iforestr2   s      r)   test_iforest_with_uniform_datar     sl    		AGKKNNN
)


"
"Cwq!!Q&'''''wsyyb1122a788888wq1u%%*+++++wq1u%%*+++++ 		#))Ar""C++AGKKNNNwq!!Q&'''''wsyyb1122a788888wrwy1122a788888 			!RAGKKNNNwq!!Q&'''''wsyyb1122a788888wrwy1122a78888888r:   csc_containerc                     t          ddd          \  }} | |          }t          ddd                              |           d	S )
zdCheck that Isolation Forest does not segfault with n_jobs=2

    Non-regression test for #23252
    iL rl   r   	n_samples
n_featuresr   r/      r   )r   r   r^   N)r   r	   r"   )r   rM   _s      r)   *test_iforest_with_n_jobs_does_not_segfaultr   =  sT     CaPPPDAqaAQ???CCAFFFFFr:   c                     t          j        d          } t          j                            d          }|                     |                    d          dg          }t          dd          }t          j	                    5  t          j
        dt                     |                    |           d	d	d	           d	S # 1 swxY w Y   d	S )
zCheck that feature names are preserved when contamination is not "auto".

    Feature names are required for consistency checks during scoring.

    Non-regression test for Issue #25844
    pandasr   r{   a)r1   columnsg?r~   rA   N)rD   importorskipr    r   r   	DataFramerm   r	   rG   rH   rI   rF   r"   )pdr2   rM   models       r)   #test_iforest_preserve_feature_namesr   H  s     
	X	&	&B
)


"
"C
#))A,,66A$???E		 	"	"  g{333		!                 s   0C  CCc                 b   t          ddd          \  }} | |          }|                                 d}t          d|d                              |          }|                    |          }|dk                                     |j        d         z  t          j        |          k    sJ dS )	zCheck that `IsolationForest` accepts sparse matrix input and float value for
    contamination.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/27626
    r-   r{   r   r   r   r   )r   rv   r   N)	r   sort_indicesr	   r"   rq   sumrV   rD   approx)r+   rM   r   rv   r   
X_decisions         r)   -test_iforest_sparse_input_float_contaminationr   Z  s     JJJDAqANNMm!  	c!ff  **1--JN!!AGAJ.&-2N2NNNNNNNr:   r^   r   r   c           	      v   ddgddgddgddgddgddgddgddgg}t          | |d	
          }|                    |           |                    |           }|                    |          }t	          j        |dd	                   t	          j        |d	d                   k    sJ t          |ddgz  ddgz  z              t          | |d
          }|                    |           t          d|          5  |                    |          }d	d	d	           n# 1 swxY w Y   t          ||           d	S )z5Check that `IsolationForest.predict` is parallelized.ry   rg   r   r   rz   r{   r|   r}   N)r   rv   r^   r   	threadingr_   )	r	   r"   rq   r#   r    r   r   r   r   )	r$   rv   r^   rM   rW   r   r   clf_parallelpred_paralells	            r)   test_iforest_predict_parallelr   n  s   
 bB8b"X1v1v1v1vAwOA '}T  C GGAJJJ**1---M;;q>>D 6-$%%}SbS/A(B(BBBBBtQ!WqB4x/000"'}R  L Q	+f	5	5	5 0 0$,,Q//0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 t]+++++s   <DD"%D"r   )9__doc__rG   unittest.mockr   r   numpyr    rD   joblibr   sklearn.datasetsr   r   r   sklearn.ensembler	   sklearn.ensemble._iforestr
   sklearn.metricsr   sklearn.model_selectionr   r   sklearn.utilsr   sklearn.utils._testingr   r   r   r   sklearn.utils.fixesr   r   rC   r0   r*   markparametrizer9   rO   rY   r\   re   ru   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r:   r)   <module>r      s     % % % % % % % %      # # # # # # J J J J J J J J J J , , , , , , : : : : : : ) ) ) ) ) ) C C C C C C C C , , , , , ,            ? > > > > > > > y{{=??    +^n-LMM: : NM:23 3 3,B B B0 0 0& & &(0 0 00 4.991 1 :910 0 0  B B B(  "( ( (. 0++*++   9I{;STT: : UT	 
: 0,,+,,   9I{;STT: : UT	 
:9 9 9D .99G G :9G  $ +^n-LMMO O NMO& Aq6**4.99, , :9 +*, , ,r:   