
    J/Ph                         d dl Zd dlmZmZmZ d dlmZmZm	Z	 d dl
mZ d Zd Zd Zd Zd	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Z G d de	          Zedk    r ej                     dS dS )    N)cudaint32float32)skip_on_cudasimunittestCUDATestCase)ENABLE_CUDASIMc                 ^    t          j        d          }t          j                     || |<   d S N   )r   gridsyncthreadsaryis     a/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/numba/cuda/tests/cudapy/test_sync.pyuseless_syncthreadsr      s+    	!ACFFF    c                 ^    t          j        d          }t          j                     || |<   d S r   r   r   syncwarpr   s     r   useless_syncwarpr      s'    	!AMOOOCFFFr   c                 `    t          j        d          }t          j        d           || |<   d S )Nr     r   r   s     r   useless_syncwarp_with_maskr      s,    	!AM&CFFFr   c                 h   t           j                            dt                    }t          j        d          }|||<   t          j                     |dk     r+||         ||dz            z   ||<   t          j        d           |dk     r+||         ||dz            z   ||<   t          j        d           |dk     r+||         ||dz            z   ||<   t          j        d           |d	k     r+||         ||d	z            z   ||<   t          j        d
           |dk    r|d         |d         z   | d<   d S d S )N    r      r                     r   )r   sharedarrayr   r   r   )ressmr   s      r   coop_syncwarpr)      s/   			2u	%	%B	!ABqEMOOO2vv11r6
"1f1uu11q5	!1d1uu11q5	!1c1uu11q5	!1cAvvAAA vr   c                     d}t           j                            |t                    }t          j        d          }|dk    rt          |          D ]}|||<   t          j                     ||         | |<   d S )Nd   r   r   )r   r%   r&   r   r   ranger   )r   Nr(   r   js        r   simple_smemr/   4   st    A			1e	$	$B	!AAvvq 	 	ABqEEUCFFFr   c                     t          j        d          \  }}t           j                            dt                    }|dz   |dz   z  |||f<   t          j                     |||f         | ||f<   d S )Nr#   
      r   r   r   r%   r&   r   r   )r   r   r.   r(   s       r   coop_smem2dr5   ?   sl    9Q<<DAq			8W	-	-BA!a% Bq!tH1a4C1IIIr   c                     t          j        d          }t           j                            dt                    }|dz  ||<   t          j                     ||         | |<   d S )Nr   r   r#   r4   )r   r   r(   s      r   dyn_shared_memoryr7   G   sS    	!A			1g	&	&BEBqEUCFFFr   c                 l    | dxx         dz  cc<   t          j                     | dxx         dz  cc<   d S Nr   {   iA  )r   threadfencer   s    r   use_threadfencer=   O   s?    FFFcMFFFFFFcMFFFFFr   c                 l    | dxx         dz  cc<   t          j                     | dxx         dz  cc<   d S r9   )r   threadfence_blockr<   s    r   use_threadfence_blockr@   U   s?    FFFcMFFFFFFcMFFFFFr   c                 l    | dxx         dz  cc<   t          j                     | dxx         dz  cc<   d S r9   )r   threadfence_systemr<   s    r   use_threadfence_systemrC   [   s?    FFFcMFFFFFFcMFFFFFr   c                 h    t          j        d          }t          j        | |                   ||<   d S r   )r   r   syncthreads_countary_inary_outr   s      r   use_syncthreads_countrI   a   s+    	!A'q	22GAJJJr   c                 h    t          j        d          }t          j        | |                   ||<   d S r   )r   r   syncthreads_andrF   s      r   use_syncthreads_andrL   f   s+    	!A%fQi00GAJJJr   c                 h    t          j        d          }t          j        | |                   ||<   d S r   )r   r   syncthreads_orrF   s      r   use_syncthreads_orrO   k   s+    	!A$VAY//GAJJJr   c                 L    t           rdS t          j                    j        | k    S )NT)r	   r   get_current_devicecompute_capability)ccs    r   _safe_cc_checkrT   p   s'     Bt&((;rAAr   c                   ~   e Zd Zd Zd Z ed          d             Z ed           ej         e	d          d          d                         Z
 ed           ej         e	d          d          d                         Zd	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd ZdS )TestCudaSyncc                 *    t          j        d          |          }d}t          j        |t          j                  }t          j        |t          j                  } |d|f         |           t          j                            ||           d S )Nvoid(int32[::1])r2   dtyper   )r   jitnpemptyr   arangetestingassert_equal)selfkernelcompilednelemr   exps         r   _test_uselesszTestCudaSync._test_uselessx   s    /48.//77huBH---iRX...E3

S)))))r   c                 :    |                      t                     d S N)rf   r   ra   s    r   test_useless_syncthreadsz%TestCudaSync.test_useless_syncthreads   s    ./////r   z#syncwarp not implemented on cudasimc                 :    |                      t                     d S rh   )rf   r   ri   s    r   test_useless_syncwarpz"TestCudaSync.test_useless_syncwarp   s    +,,,,,r   )   r   z'Partial masks require CC 7.0 or greaterc                 :    |                      t                     d S rh   )rf   r   ri   s    r   test_useless_syncwarp_with_maskz,TestCudaSync.test_useless_syncwarp_with_mask   s     	566666r   c                    d}d}d} t          j        d          t                    }t          j        dt          j                  } |||f         |           t          j                            ||d                    d S )Ni  r   r   rX   rY   r   )r   r[   r)   r\   zerosr   r_   r`   )ra   expectednthreadsnblocksrc   r'   s         r   test_coop_syncwarpzTestCudaSync.test_coop_syncwarp   s     /48.//>>hq)))#("#C(((

#a&11111r   c           	      F    t          j        d          t                    }d}t          j        |t          j                  } |d|f         |           |                     t          j        |t          j        |t          j                  k                         d S )NrX   r+   rY   r   )	r   r[   r/   r\   r]   r   
assertTrueallr^   )ra   rc   rd   r   s       r   test_simple_smemzTestCudaSync.test_simple_smem   s    /48.//<<huBH---E3sbiRX&F&F&FFGGHHHHHr   c                     t          j        d          t                    }d}t          j        |t          j                  } |d|f         |           t          j        |          }t          |j        d                   D ]/}t          |j        d                   D ]}|dz   |dz   z  |||f<   0| 	                    t          j
        ||                     d S )Nzvoid(float32[:,::1])r1   rY   r   r   )r   r[   r5   r\   r]   r   
empty_liker,   shaperw   allclose)ra   rc   r|   r   re   r   r.   s          r   test_coop_smem2dzTestCudaSync.test_coop_smem2d   s    348233K@@huBJ///E3mC  sy|$$ 	. 	.A39Q<(( . .Uq1u-AqD		.C--.....r   c           
      j    t          j        d          t                    }d}t          j        |t          j                  } |d|d|j        dz  f         |           |                     t          j        |dt          j	        |j        t          j
                  z  k                         d S )Nzvoid(float32[::1])2   rY   r   r   r!   r#   )r   r[   r7   r\   r]   r   sizerw   rx   r^   r   )ra   rc   r|   r   s       r   test_dyn_shared_memoryz#TestCudaSync.test_dyn_shared_memory   s    1480112CDDhuBJ///+E1chl*+C000sa")CHBH*M*M*M&MMNNOOOOOr   c                 h   t           d d          f} t          j        |          t                    }t	          j        dt          j                   } |d         |           |                     d|d                    t          s+|                     d|	                    |                     d S d S )Nr2   rY   r   r     r   z
membar.gl;)
r   r   r[   r=   r\   rq   assertEqualr	   assertIninspect_asmra   sigrc   r   s       r   test_threadfence_codegenz%TestCudaSync.test_threadfence_codegen   s    QQQxk 48C==11hr***sCF+++ 	CMM,(<(<S(A(ABBBBB	C 	Cr   c                 h   t           d d          f} t          j        |          t                    }t	          j        dt          j                   } |d         |           |                     d|d                    t          s+|                     d|	                    |                     d S d S )Nr2   rY   r   r   r   zmembar.cta;)
r   r   r[   r@   r\   rq   r   r	   r   r   r   s       r   test_threadfence_block_codegenz+TestCudaSync.test_threadfence_block_codegen   s    QQQxk 48C==!677hr***sCF+++ 	DMM-)=)=c)B)BCCCCC	D 	Dr   c                 h   t           d d          f} t          j        |          t                    }t	          j        dt          j                   } |d         |           |                     d|d                    t          s+|                     d|	                    |                     d S d S )Nr2   rY   r   r   r   zmembar.sys;)
r   r   r[   rC   r\   rq   r   r	   r   r   r   s       r   test_threadfence_system_codegenz,TestCudaSync.test_threadfence_system_codegen   s    QQQxk 48C==!788hr***sCF+++ 	DMM-)=)=c)B)BCCCCC	D 	Dr   c                 2   t          j        t                    }t          j        d|          }t          j        dt          j                  }d|d<   d|d<    |d         ||           |                     t          j        |dk                         d S )NH   rY   r      *   )r   r   F   )	r   r[   rI   r\   onesrq   r   rw   rx   )ra   in_dtyperc   rG   rH   s        r   _test_syncthreads_countz$TestCudaSync._test_syncthreads_count   s    81228,,,(2RX...r
r
(((w"}--.....r   c                 D    |                      t          j                   d S rh   )r   r\   r   ri   s    r   test_syncthreads_countz#TestCudaSync.test_syncthreads_count       $$RX.....r   c                 D    |                      t          j                   d S rh   )r   r\   int16ri   s    r   test_syncthreads_count_upcastz*TestCudaSync.test_syncthreads_count_upcast   r   r   c                 D    |                      t          j                   d S rh   )r   r\   int64ri   s    r   test_syncthreads_count_downcastz,TestCudaSync.test_syncthreads_count_downcast   r   r   c                    t          j        t                    }d}t          j        ||          }t          j        |t          j                  } |d|f         ||           |                     t          j        |dk                         d|d<    |d|f         ||           |                     t          j        |dk                         d S Nr+   rY   r   r   r   )	r   r[   rL   r\   r   rq   r   rw   rx   ra   r   rc   rd   rG   rH   s         r   _test_syncthreads_andz"TestCudaSync._test_syncthreads_and   s    8/00h///(5111E67+++w!|,,---r
E67+++w!|,,-----r   c                 D    |                      t          j                   d S rh   )r   r\   r   ri   s    r   test_syncthreads_andz!TestCudaSync.test_syncthreads_and       ""28,,,,,r   c                 D    |                      t          j                   d S rh   )r   r\   r   ri   s    r   test_syncthreads_and_upcastz(TestCudaSync.test_syncthreads_and_upcast   r   r   c                 D    |                      t          j                   d S rh   )r   r\   r   ri   s    r   test_syncthreads_and_downcastz*TestCudaSync.test_syncthreads_and_downcast   r   r   c                    t          j        t                    }d}t          j        ||          }t          j        |t          j                  } |d|f         ||           |                     t          j        |dk                         d|d<    |d|f         ||           |                     t          j        |dk                         d S r   )r   r[   rO   r\   rq   r   rw   rx   r   s         r   _test_syncthreads_orz!TestCudaSync._test_syncthreads_or   s    8.//%x000(5111E67+++w!|,,---r
E67+++w!|,,-----r   c                 D    |                      t          j                   d S rh   )r   r\   r   ri   s    r   test_syncthreads_orz TestCudaSync.test_syncthreads_or      !!"(+++++r   c                 D    |                      t          j                   d S rh   )r   r\   r   ri   s    r   test_syncthreads_or_upcastz'TestCudaSync.test_syncthreads_or_upcast  r   r   c                 D    |                      t          j                   d S rh   )r   r\   r   ri   s    r   test_syncthreads_or_downcastz)TestCudaSync.test_syncthreads_or_downcast
  r   r   N)__name__
__module____qualname__rf   rj   r   rl   r   
skipUnlessrT   ro   ru   ry   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r   r   rV   rV   w   s"       * * *0 0 0 _:;;- - <;- _:;;X//BD D7 7D D <;7 _:;;X//BD D
2 
2D D <;
2I I I	/ 	/ 	/P P PC C CD D DD D D/ / // / // / // / /	. 	. 	.- - -- - -- - -	. 	. 	., , ,, , ,, , , , ,r   rV   __main__)numpyr\   numbar   r   r   numba.cuda.testingr   r   r   numba.core.configr	   r   r   r   r)   r/   r5   r7   r=   r@   rC   rI   rL   rO   rT   rV   r   mainr   r   r   <module>r      s       & & & & & & & & & & F F F F F F F F F F , , , , , ,        6            3 3 3
1 1 1
0 0 0
B B BT, T, T, T, T,< T, T, T,n zHMOOOOO r   