
    0Ph>                         d dl Z d dlmZ d dlZd dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZmZ d dlmZ d d	lmZmZmZmZ d d
lmZ d Zej                            de          d             Zej                            de          d             Zej                            de          d             Zej                            de          d             Z ej                            de          d             Z!d Z"ej                            dd          d             Z#d Z$d Z%eej                            de          ej                            dd          d                                     Z&eej                            de          d                         Z'd Z(ej                            dd          ej                            de          d                         Z)ej                            dd          ej                            de          d                         Z*ej                            de          d             Z+ej                            de          d             Z,ej                            de          d             Z-ej                            de          d              Z.d! Z/d" Z0ej                            d#g d$          ej                            de          d%                         Z1ej                            de          d&             Z2ej                            d'd(          d)             Z3ej                            d'd(          d*             Z4dS )+    N)StringIO)assert_array_equal)
block_diag)psi)LatentDirichletAllocation)_dirichlet_expectation_1d_dirichlet_expectation_2d)NotFittedError)assert_allcloseassert_almost_equalassert_array_almost_equal!if_safe_multiprocessing_with_blas)CSR_CONTAINERSc                 z    d}t          j        d|t                    }|g|z  }t          | } | |          }||fS )N   )r   r   )dtype)npfullintr   )csr_containern_componentsblockblocksXs        k/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/sklearn/decomposition/tests/test_online_lda.py_build_sparse_arrayr      sO     LGFL444EW|#FFAaA!    r   c                     t          |           \  }}d|z  }t          |||d          }t          |d          }|                    |          }|                    |          }t          ||           d S )Ng      ?r   )r   doc_topic_priortopic_word_priorrandom_stater   r!   )r   r   fit_transformr   )r   r   r   priorlda_1lda_2topic_distr_1topic_distr_2s           r   test_lda_default_prior_paramsr)   $   s     *-88OL!,E%!	  E &<aPPPE''**M''**M}55555r   c                 x   t           j                            d          }t          |           \  }}t	          |dd|          }|                    |           g d}|j        D ]T}t          |                                dd          d d d                   }t          t          |                    |v sJ Ud S )Nr      batch)r   evaluate_everylearning_methodr!   )r   r+      )r         )         r   randomRandomStater   r   fitcomponents_setargsorttuplesortedr   rngr   r   ldacorrect_idx_grps	componenttop_idxs           r   test_lda_fit_batchrG   6   s     )


"
"C)-88OL!
#!	  C GGAJJJ888_ : :	i''))"##.ttt455VG__%%)999999: :r   c                 z   t           j                            d          }t          |           \  }}t	          |ddd|          }|                    |           g d}|j        D ]T}t          |                                dd          d d d                   }t          t          |                    |v sJ Ud S )	Nr         $@r+   online)r   learning_offsetr-   r.   r!   r/   r6   r7   r8   rA   s           r   test_lda_fit_onlinerL   J   s     )


"
"C)-88OL!
#!   C GGAJJJ888_ : :	i''))"##.ttt455VG__%%)999999: :r   c                    t           j                            d          }t          |           \  }}t	          |dd|          }t          d          D ]}|                    |           g d}|j        D ]T}t          |	                                dd          d d d                   }t          t          |                    |v sJ Ud S )	Nr   rI   d   r   rK   total_samplesr!   r   r/   r6   r7   r   r9   r:   r   r   rangepartial_fitr<   r=   r>   r?   r@   	r   rB   r   r   rC   irD   crF   s	            r   test_lda_partial_fitrW   _   s     )


"
"C)-88OL!
#!	  C 1XX  888_ : :aiikk"##&ttt,--VG__%%)999999: :r   c                    t           j                            d          }t          |           \  }}t	          |d|          }|                    |                                           g d}|j        D ]T}t          |	                                dd          d d d                   }t          t          |                    |v sJ Ud S )Nr   r,   r   r.   r!   r/   r6   r7   )r   r9   r:   r   r   r;   toarrayr<   r=   r>   r?   r@   rA   s           r   test_lda_dense_inputr[   t   s     )


"
"C)-88OL!
#!7  C GGAIIKK888_ : :	i''))"##.ttt455VG__%%)999999: :r   c                     t           j                            d          } |                     dd          }d}t	          ||           }|                    |          }|dk                                    sJ t          t          j        |d	          t          j	        |j
        d                              |                    |d
          }t          |||                    d	          d d t           j        f         z             d S )Nr   r2      
   sizer   r"   g        r+   axisF)	normalize)r   r9   r:   randintr   r#   anyr   sumonesshape	transformnewaxis)rB   r   r   rC   X_transX_trans_unnormalizeds         r   test_lda_transformrn      s    )


"
"CAH%%AL
#C
P
P
PC""GcM     bfW1555rww}Q?O7P7PQQQ==e=<<%(<(@(@a(@(H(HBJ(WW    r   method)rJ   r,   c                    t           j                            d          }|                    dd          }t	          d| |          }|                    |          }|                    |          }t          ||d           d S )Nr   r_   )2   r^   r`   r2   rY   r1   )r   r9   r:   re   r   r#   rj   r   )ro   rB   r   rC   X_fitrl   s         r   test_lda_fit_transformrs      s     )


"
"CBX&&A
#S  C a  EmmAGeWa00000r   c                      t          j        dd          } t                      }d}t          j        t
          |          5  |                    |            d d d            d S # 1 swxY w Y   d S )N)r2   r_         z^Negative values in data passedmatch)r   r   r   pytestraises
ValueErrorr;   )r   rC   regexs      r   test_lda_negative_inputr|      s    
A
#
%
%C.E	z	/	/	/  


                 s   A$$A(+A(c                  $   t           j                            d          } |                     dd          }t	                      }d}t          j        t          |          5  |                    |           d d d            d S # 1 swxY w Y   d S )Nr   r1   r]   r`   z}This LatentDirichletAllocation instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.rv   )	r   r9   r:   re   r   rx   ry   r
   
perplexity)rB   r   rC   r{   s       r   test_lda_no_component_errorr      s    
)


"
"CAH%%A
#
%
%C	 

 
~U	3	3	3  q                 s   "BB	B	c                 z   t          |          \  }}t          j                            d          }t	          |d| d|          }|                    |           g d}|j        D ]T}t          |                                dd          d d d                   }t          t          |                    |v sJ Ud S )Nr   r0   r+   )r   n_jobsr.   r-   r!   r/   r6   r7   )r   r   r9   r:   r   r;   r<   r=   r>   r?   r@   )	ro   r   r   r   rB   rC   rD   rV   rF   s	            r   test_lda_multi_jobsr      s     *-88OL!
)


"
"C
#!  C GGAJJJ888_ : :aiikk"##&ttt,--VG__%%)999999: :r   c                    t           j                            d          }t          |           \  }}t	          |ddd|          }t          d          D ]}|                    |           g d}|j        D ]T}t          |	                                dd          d d d                   }t          t          |                    |v sJ Ud S )	Nr   r0         @   )r   r   rK   rP   r!   r/   r6   r7   rQ   rT   s	            r   test_lda_partial_fit_multi_jobsr      s     )


"
"C)-88OL!
#!  C 1XX  888_ : :aiikk"##&ttt,--VG__%%)999999: :r   c                     t           j                            d          } |                     dd          }|                     dd          }t           j                            d|df          }t	          |dd| 	          }|                    |           |                     d|d
z   |f          }t          j        t          d          5  |	                    ||           d d d            n# 1 swxY w Y   |                     d||d
z   f          }t          j        t          d          5  |	                    ||           d d d            d S # 1 swxY w Y   d S )Nr   r   r3   r_   r1   r`   r   r^   rO   r+   zNumber of samplesrv   zNumber of topics)
r   r9   r:   re   r   r;   rx   ry   rz   _perplexity_precomp_distr)rB   r   	n_samplesr   rC   invalid_n_samplesinvalid_n_componentss          r   test_lda_preplexity_mismatchr      s   
)


"
"C;;q!$$LAr""I
	!9b/22A
#!	  C GGAJJJAY]L,IJJ	z)=	>	>	> < <%%a):;;;< < < < < < < < < < < < < < < ;;q	<!;K/L;MM	z)<	=	=	= ? ?%%a)=>>>? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?s$   C11C58C53EEEc                    t          |          \  }}t          |d| dd          }t          |d| dd          }|                    |           |                    |d          }|                    |           |                    |d          }||k    sJ |                    |d          }|                    |d          }	||	k    sJ d S )	Nr+   rN   r   r   max_iterr.   rP   r!   r_   Fsub_samplingT)r   r   r;   r~   )
ro   r   r   r   r%   r&   perp_1perp_2perp_1_subsamplingperp_2_subsamplings
             r   test_lda_perplexityr      s   
 *-88OL!%!  E &!  E 
IIaLLLae44F	IIaLLLae44FV))!$)??))!$)??!3333333r   c                 2   t          |          \  }}t          |d| dd          }t          |d| dd          }|                    |           |                    |          }|                    |           |                    |          }||k    sJ d S )Nr+   rN   r   r   r_   )r   r   r#   score)ro   r   r   r   r%   r&   score_1score_2s           r   test_lda_scorer     s    
 *-88OL!%!  E &!  E 
kk!nnG	kk!nnGgr   c                    t          |           \  }}t          |dddd          }|                    |           |                    |          }|                    |                                          }t          ||           d S )Nr+   r,   rN   r   r   )r   r   r;   r~   rZ   r   )r   r   r   rC   r   r   s         r   test_perplexity_input_formatr   :  s     *-88OL!
#!  C GGAJJJ^^AF^^AIIKK((F'''''r   c                 R   t          |           \  }}t          |dd          }|                    |           |                    |d          }|                    |          }t          j        d|t          j        |j                  z  z            }t          ||           d S )Nr_   r   )r   r   r!   Fr   ru   )
r   r   r;   r~   r   r   exprg   datar   )r   r   r   rC   perplexity_1r   perplexity_2s          r   test_lda_score_perplexityr   L  s     *-88OL!
#!BQ  C GGAJJJ>>!%>88LIIaLLE6$%"&.."89::Ll33333r   c                     t          |           \  }}t          |dddd          }|                    |           |j        }|                    |          }t          ||           d S )Nr+   r,   r   )r   r   r.   r!   r-   )r   r   r;   bound_r~   r   )r   r   r   rC   perplexity1perplexity2s         r   test_lda_fit_perplexityr   [  s{     *-88OL!
#!  C GGAJJJ *K ..##K[11111r   c                 *   t          j        d          }| | |          fD ]p}t          d                              |          }t	          |j                            d          t          j        |j        j        d                              qdS )z+Test LDA on empty document (all-zero rows).)r2   r1   i  )r   r   rb   r+   N)	r   zerosr   r;   r   r<   rg   rh   ri   )r   Zr   rC   s       r   test_lda_empty_docsr   r  s     	Aq!!" 
 
'55599!<<OQ''1Fq1I)J)J	
 	
 	
 	

 
r   c                     t          j        ddd          } t          j        |           }t          | d|           t	          |t          j        t          |           t          t          j        |                     z
            d           |                     dd          } t	          t          |           t          |           t          t          j        | d	          d
d
t           j
        f                   z
  dd           d
S )z9Test Cython version of Dirichlet expectation calculation.ir_   i'  r   gҶOɃ;)atolrN   r+   rb   Ngdy=gA:)>)rtolr   )r   logspace
empty_liker   r   r   r   rg   reshaper	   rk   )xexpectations     r   test_dirichlet_expectationr   }  s    
D"e$$A-""KaK000KARVAYY(?!@!@uMMMM			#sA!!$$ARVAA&&&qqq"*}5666	     r   c                    t          |          \  }}t          |dd| |d          }t                      }t          j        |c}	t          _        	 |                    |           |	t          _        n# |	t          _        w xY w|                                                    d          }
|                                                    d          }||
k    sJ ||k    sJ d S )Nr   r,   r   )r   r   r.   verboser-   r!   
r~   )r   r   r   sysstdoutr;   getvaluecount)r   r-   expected_linesexpected_perplexitiesr   r   r   rC   outold_outn_linesn_perplexitys               r   check_verbosityr     s     *-88OL!
#!%  C **C*cGSZ




W
llnn""4((G<<>>''55LW$$$$ L000000s   A2 2B z;verbose,evaluate_every,expected_lines,expected_perplexities))Fr+   r   r   )Fr   r   r   )Tr   r   r   )Tr+   r   r   )Tr0   r   r+   c                 ,    t          | ||||           d S )N)r   )r   r-   r   r   r   s        r   test_verbosityr     s,     1F    r   c                     t          |           \  }}t          |                              |          }|                                }t	          d t          |          D             |           dS )z6Check feature names out for LatentDirichletAllocation.)r   c                     g | ]}d | S )latentdirichletallocation ).0rU   s     r   
<listcomp>z.test_lda_feature_names_out.<locals>.<listcomp>  s!    FFFQ	(Q	(	(FFFr   N)r   r   r;   get_feature_names_outr   rR   )r   r   r   rC   namess        r   test_lda_feature_names_outr     s{     *-88OL!
#
>
>
>
B
B1
E
EC%%''EFF%2E2EFFF    r   r.   )r,   rJ   c                 0   t           j                            d          }|                    d                              |d          }t          dd|           }|                    |           |j        j        |k    sJ |j	        j        |k    sJ dS )	z2Check data type preservation of fitted attributes.r   r]   r`   F)copyr2   r   r!   r.   N)
r   r9   r:   uniformastyper   r;   r<   r   exp_dirichlet_component_)r.   global_dtyperB   r   rC   s        r   test_lda_dtype_matchr     s     )


"
"C"")),U)CCA
#Q  C GGAJJJ? L0000'-======r   c                    t           j                            |          }|                    d          }|                    t           j                  }t          d||                               |          }t          d||                               |          }t          |j	        |j	                   t          |
                    |          |
                    |                     dS )z>Check numerical consistency between np.float32 and np.float64.r]   r`   r2   r   N)r   r9   r:   r   r   float32r   r;   r   r<   rj   )r.   global_random_seedrB   X64X32lda_64lda_32s          r   test_lda_numerical_consistencyr     s     )

 2
3
3C
++8+
$
$C
**RZ
 
 C&%7  	c#hh  '%7  	c#hh  F&(:;;;F$$S))6+;+;C+@+@AAAAAr   )5r   ior   numpyr   rx   numpy.testingr   scipy.linalgr   scipy.specialr   sklearn.decompositionr   &sklearn.decomposition._online_lda_fastr   r	   sklearn.exceptionsr
   sklearn.utils._testingr   r   r   r   sklearn.utils.fixesr   r   markparametrizer)   rG   rL   rW   r[   rn   rs   r|   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>r      sg   



            , , , , , , # # # # # #       ; ; ; ; ; ;        . - - - - -            / . . . . .   .996 6 :96" .99: : :9:& .99: : :9:( .99: : :9:( .99: : :9:   " #677
1 
1 87
1     #.99#677: : 87 :9 #":& #.99: : :9 #":(? ? ?. #677.994 4 :9 874< #677.99  :9 874 .99( ( :9(" .994 4 :94 .992 2 :92, .99
 
 :9
   1 1 12 A  	 	 .99  :9	 	 .99  :9 *,?@@
> 
> A@
> *,?@@B B A@B B Br   