
    J/Phs                        d dl Z d dlZd dlZd dlZd dlmZmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZmZmZmZ d Zd Zd	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z d Z!d Z"d Z#d Z$d Z%d Z&d Z'd Z(d Z)d Z*d Z+d  Z,d! Z-d" Z.d# Z/d$ Z0 ej1        d%&          d'             Z2 ej1        d%&          d(             Z3d) Z4d* Z5d+ Z6d, Z7d- Z8d. Z9d/ Z:d0 Z;d1 Z<d2 Z=d3 Z>d4 Z?d5 Z@d6 ZAd7 ZBd8 ZCd9 ZDd: ZEd; ZFd< ZGd= ZHd> ZId? ZJd@ ZKdA ZLdB ZMdC ZNdD ZOdE ZPdF ZQdG ZRdH ZSdI ZTdJ ZU G dK dLe          ZVeWdMk    r ejX                     dS dS )N    N)cudaint64)compile_ptx)TypingError)f2)unittestCUDATestCaseskip_on_cudasimskip_unless_cc_53c                 2    t           j        j        }|| d<   d S Nr   r   	threadIdxxaryis     g/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/numba/cuda/tests/cudapy/test_intrinsics.pysimple_threadidxr          ACFFF    c                 2    t           j        j        }|| |<   d S Nr   r   s     r   fill_threadidxr      r   r   c                     t           j        j        }t           j        j        }t           j        j        }|dz   |dz   z  |dz   z  | |||f<   d S N   )r   r   r   yz)r   r   jks       r   fill3d_threadidxr"      sI    AAAEa!e$A.C1aLLLr   c                 8    t          j        d          }|| |<   d S r   r   gridr   s     r   simple_grid1dr&      s    	!ACFFFr   c                 H    t          j        d          \  }}||z   | ||f<   d S N   r$   )r   r   r    s      r   simple_grid2dr*   $   s(    9Q<<DAqAC1IIIr   c                 p    t          j        d          }t          j        d          }|dk    r|| d<   d S d S Nr   r   r   r%   gridsize)r   r   r   s      r   simple_gridsize1dr/   )   s;    	!AaAAvvA vr   c                     t          j        d          \  }}t          j        d          \  }}|dk    r|dk    r|| d<   || d<   d S d S d S )Nr)   r   r   r-   )r   r   r    r   r   s        r   simple_gridsize2dr1   0   sX    9Q<<DAq=DAqAvv!q&&AA v&&r   c                 8   t          j        d          \  }}t           j        j        t           j        j        z  }t           j        j        t           j        j        z  }| j        \  }}t          |||          D ] }t          |||          D ]}||z   | ||f<   !d S r(   )r   r%   gridDimr   blockDimr   shaperange)	cstartXstartYgridXgridYheightwidthr   r   s	            r   intrinsic_forloop_stepr>   8   s    Yq\\NFFLNT]_,ELNT]_,EGMFE65%((  vvu-- 	 	A!eAadGG	 r   c                 4    t          j        |          | d<   d S r   )r   popcr   r7   s     r   simple_popcrB   C       Yq\\CFFFr   c                 8    t          j        |||          | d<   d S r   )r   fmar   abr7   s       r   
simple_fmarI   G   s    XaACFFFr   c                 d    t           j                            |d         |d                   | d<   d S r   r   fp16haddr   rG   rH   s      r   simple_haddrO   K   '    Y^^AaD!A$''CFFFr   c                 L    t           j                            ||          | d<   d S r   rK   rN   s      r   simple_hadd_scalarrR   O       Y^^Aq!!CFFFr   c                 r    t           j                            |d         |d         |d                   | d<   d S r   r   rL   hfmarF   s       r   simple_hfmarW   S   s-    Y^^AaD!A$!--CFFFr   c                 N    t           j                            |||          | d<   d S r   rU   rF   s       r   simple_hfma_scalarrY   W   s!    Y^^Aq!$$CFFFr   c                 d    t           j                            |d         |d                   | d<   d S r   r   rL   hsubrN   s      r   simple_hsubr]   [   rP   r   c                 L    t           j                            ||          | d<   d S r   r[   rN   s      r   simple_hsub_scalarr_   _   rS   r   c                 d    t           j                            |d         |d                   | d<   d S r   r   rL   hmulrN   s      r   simple_hmulrc   c   rP   r   c                 L    t           j                            ||          | d<   d S r   ra   rN   s      r   simple_hmul_scalarre   g   rS   r   c                 L    t           j                            ||          | d<   d S r   )r   rL   hdivrN   s      r   simple_hdiv_scalarrh   k   rS   r   c                     t          j        d          }|| j        k     r5||         }||         }t           j                            ||          | |<   d S d S r   )r   r%   sizerL   rg   )r   array_aarray_br   rG   rH   s         r   simple_hdiv_kernelrm   o   sP    	!A38||AJAJ1%%A |r   c                 V    t           j                            |d                   | d<   d S r   r   rL   hnegr   rG   s     r   simple_hnegrr   w   !    Y^^AaD!!CFFFr   c                 J    t           j                            |          | d<   d S r   ro   rq   s     r   simple_hneg_scalarru   {       Y^^ACFFFr   c                 V    t           j                            |d                   | d<   d S r   r   rL   habsrq   s     r   simple_habsrz      rs   r   c                 J    t           j                            |          | d<   d S r   rx   rq   s     r   simple_habs_scalarr|      rv   r   c                 L    t           j                            ||          | d<   d S r   )r   rL   heqrN   s      r   simple_heq_scalarr          Y]]1a  CFFFr   c                 L    t           j                            ||          | d<   d S r   )r   rL   hnerN   s      r   simple_hne_scalarr      r   r   c                 L    t           j                            ||          | d<   d S r   )r   rL   hgerN   s      r   simple_hge_scalarr      r   r   c                 L    t           j                            ||          | d<   d S r   )r   rL   hgtrN   s      r   simple_hgt_scalarr      r   r   c                 L    t           j                            ||          | d<   d S r   )r   rL   hlerN   s      r   simple_hle_scalarr      r   r   c                 L    t           j                            ||          | d<   d S r   r   rL   hltrN   s      r   simple_hlt_scalarr      r   r   T)devicec                 B    t           j                            | |          S r   r   r   r   s     r   
hlt_func_1r          9==Ar   c                 B    t           j                            | |          S r   r   r   s     r   
hlt_func_2r      r   r   c                 L    t          ||          ot          ||          | d<   d S r   )r   r   rrG   rH   r7   s       r   test_multiple_hcmp_1r      s(    a0
1a 0 0AaDDDr   c                 l    t          ||          ot          j                            ||          | d<   d S r   )r   r   rL   r   r   s       r   test_multiple_hcmp_2r      .    a3	a 3 3AaDDDr   c                 l    t          ||          ot          j                            ||          | d<   d S r   )r   r   rL   r   r   s       r   test_multiple_hcmp_3r      r   r   c                     t           j                            ||          ot           j                            ||          | d<   d S r   r   r   s       r   test_multiple_hcmp_4r      4    9==A649==A#6#6AaDDDr   c                     t           j                            ||          ot           j                            ||          | d<   d S r   )r   rL   r   r   r   s       r   test_multiple_hcmp_5r      r   r   c                 L    t           j                            ||          | d<   d S r   )r   rL   hmaxrN   s      r   simple_hmax_scalarr      rS   r   c                 L    t           j                            ||          | d<   d S r   )r   rL   hminrN   s      r   simple_hmin_scalarr      rS   r   c                     t          j        d          }|t          |           k     r*t           j                            ||                   | |<   d S d S r   )r   r%   lenrL   hsinr   r   r   s      r   simple_hsinr      D    	!A3q66zzy~~ad##! zr   c                     t          j        d          }|t          |           k     r*t           j                            ||                   | |<   d S d S r   )r   r%   r   rL   hcosr   s      r   simple_hcosr      r   r   c                     t          j        d          }|t          |           k     r*t           j                            ||                   | |<   d S d S r   )r   r%   r   rL   hlogr   s      r   simple_hlogr      r   r   c                     t          j        d          }|t          |           k     r*t           j                            ||                   | |<   d S d S r   )r   r%   r   rL   hlog2r   s      r   simple_hlog2r      D    	!A3q66zzyqt$$! zr   c                     t          j        d          }|t          |           k     r*t           j                            ||                   | |<   d S d S r   )r   r%   r   rL   hlog10r   s      r   simple_hlog10r      F    	!A3q66zzy!%%! zr   c                     t          j        d          }|t          |           k     r*t           j                            ||                   | |<   d S d S r   )r   r%   r   rL   hexpr   s      r   simple_hexpr      r   r   c                     t          j        d          }|t          |           k     r*t           j                            ||                   | |<   d S d S r   )r   r%   r   rL   hexp2r   s      r   simple_hexp2r      r   r   c                     t          j        d          }|t          |           k     r*t           j                            ||                   | |<   d S d S r   )r   r%   r   rL   hsqrtr   s      r   simple_hsqrtr      r   r   c                     t          j        d          }|t          |           k     r*t           j                            ||                   | |<   d S d S r   )r   r%   r   rL   hrsqrtr   s      r   simple_hrsqrtr     sF    	!A3q66zzy!%%! zr   c                     | dz  S )Ng      ࿩ )r   dtypes     r   numpy_hrsqrtr   
  s    9r   c                     t          j        d          }|t          |           k     r*t           j                            ||                   | |<   d S d S r   )r   r%   r   rL   hceilr   s      r   simple_hceilr     r   r   c                     t          j        d          }|t          |           k     r*t           j                            ||                   | |<   d S d S r   )r   r%   r   rL   hfloorr   s      r   simple_hfloorr     r   r   c                     t          j        d          }|t          |           k     r*t           j                            ||                   | |<   d S d S r   )r   r%   r   rL   hrcpr   s      r   simple_hrcpr     r   r   c                     t          j        d          }|t          |           k     r*t           j                            ||                   | |<   d S d S r   )r   r%   r   rL   htruncr   s      r   simple_htruncr   #  r   r   c                     t          j        d          }|t          |           k     r*t           j                            ||                   | |<   d S d S r   )r   r%   r   rL   hrintr   s      r   simple_hrintr   *  r   r   c                 4    t          j        |          | d<   d S r   )r   cbrtrq   s     r   simple_cbrtr   1  rC   r   c                 4    t          j        |          | d<   d S r   )r   brevrA   s     r   simple_brevr   5  rC   r   c                 4    t          j        |          | d<   d S r   )r   clzrA   s     r   
simple_clzr   9      Xa[[CFFFr   c                 4    t          j        |          | d<   d S r   )r   ffsrA   s     r   
simple_ffsr   =  r   r   c                 *    t          |          | d<   d S r   roundrA   s     r   simple_roundr   A  s    1XXCFFFr   c                 ,    t          ||          | d<   d S r   r   )r   r7   ndigitss      r   simple_round_tor   E  s    1gCFFFr   c                     t          j        d          }| |         dk    r|dz  dk    r||         | |<   d S d| |<   d S d| |<   d S )Nr      r)   r         r$   )rG   rH   r7   r   s       r   branching_with_ifsr   I  sT    	!Ataxxq5A::Q4AaDDDAaDDD!r   c                     t          j        d          }t          j        |dz  dk    ||         d          }t          j        | |         dk    |d          | |<   d S )Nr   r)   r   r   r   r   )r   r%   selp)rG   rH   r7   r   inners        r   branching_with_selpsr   U  sQ    	!AIa!eqj!A$++E9QqTAXua((AaDDDr   c                 L    t          j        d          }t           j        | |<   d S r   )r   r%   laneidr   s     r   simple_laneidr   \  s    	!A[CFFFr   c                 $    t           j        | d<   d S r   )r   warpsize)r   s    r   simple_warpsizer  a  s    ]CFFFr   c                 .    t          j        |            d S r   r$   r   s    r   nonliteral_gridr  e  s    IaLLLLLr   c                 .    t          j        |            d S r   )r   r.   r  s    r   nonliteral_gridsizer  i  s    M!r   c                       e Zd Z fdZd Zd Zd Z ed          d             Z ed          d             Z	d Z
d	 Zd
 Z ed          d             Z ed          d             Zd Zd Zd Zd Zd Zd Zd Zd Zed             Zed             Z ed          d             Zed             Zed             Z ed          d             Zed             Zed             Z ed          d              Z ed!             Z!ed"             Z" ed          d#             Z#ed$             Z$ed%             Z%ed&             Z&ed'             Z' ed          d(             Z(ed)             Z)ed*             Z* ed          d+             Z+ed,             Z,ed-             Z-ed.             Z.ed/             Z/ed0             Z0ed1             Z1d2 Z2d3 Z3d4 Z4 ed5          d6             Z5d7 Z6d8 Z7d9 Z8d: Z9 ed5          d;             Z:d< Z;d= Z<d> Z=d? Z> ed5          d@             Z?dA Z@dB ZAdC ZBdD ZCdE ZD edF          dG             ZEdH ZFdI ZG edF          dJ             ZHdK ZI xZJS )LTestCudaIntrinsicc                     t                                                       t          j                            d           d S r   )supersetUpnprandomseed)self	__class__s    r   r  zTestCudaIntrinsic.setUpn  s.    
	qr   c                      t          j        d          t                    }t          j        dt          j                  } |d         |           |                     |d         dk               d S )Nvoid(int32[:])r   r   r   r   r   )r   jitr   r  onesint32
assertTruer  compiledr   s      r   test_simple_threadidxz'TestCudaIntrinsic.test_simple_threadidxr  sg    -48,--.>??garx(((sA!$$$$$r   c                 J    t          j        d          t                    }d}t          j        |t          j                  }t          j        |t          j                  } |d|f         |           |                     t          j        ||k                         d S )Nr  
   r  r   )	r   r  r   r  r  r  aranger  all)r  r  Nr   exps        r   test_fill_threadidxz%TestCudaIntrinsic.test_fill_threadidxx  s    -48,--n==garx(((i***Asscz**+++++r   c                     d\  fd}fd} |            } |            }|                      t          j        ||k                         d S )N)r         c                       t          j        d          t                    } t          j        ft          j                  } | dff         |           |S )Nzvoid(int32[:,:,::1])r  r   )r   r  r"   r  zerosr  r  r   XYZs     r   c_contigousz<TestCudaIntrinsic.test_fill3d_threadidx.<locals>.c_contigous  s^    7tx 6778HIIH(Aq!9BH555C"HQAq	\"3'''Jr   c                       t          j        d          t                    } t          j        t          j        ft          j                            } | dff         |           |S )Nzvoid(int32[::1,:,:])r  r   )r   r  r"   r  asfortranarrayr'  r  r(  s     r   f_contigousz<TestCudaIntrinsic.test_fill3d_threadidx.<locals>.f_contigous  si    7tx 6778HIIH#BHaAYbh$G$G$GHHC"HQAq	\"3'''Jr   )r  r  r  )r  r,  r/  c_resf_resr)  r*  r+  s        @@@r   test_fill3d_threadidxz'TestCudaIntrinsic.test_fill3d_threadidx  s    1a	 	 	 	 	 	 		 	 	 	 	 	 	 u~../////r   zCudasim does not check typesc                     |                      t          d          5   t          j        d          t                     d d d            d S # 1 swxY w Y   d S NRequireLiteralValuezvoid(int32))assertRaisesRegexr   r   r  r  r  s    r   test_nonliteral_grid_errorz,TestCudaIntrinsic.test_nonliteral_grid_error  s    ##K1FGG 	5 	5#DH]##O444	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5   #AAAc                     |                      t          d          5   t          j        d          t                     d d d            d S # 1 swxY w Y   d S r4  )r6  r   r   r  r  r7  s    r   test_nonliteral_gridsize_errorz0TestCudaIntrinsic.test_nonliteral_gridsize_error  s    ##K1FGG 	9 	9#DH]##$7888	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9r9  c                 >    t          j        d          t                    }d\  }}||z  }t          j        |t          j                  } |||f         |           |                     t          j        |t          j        |          k                         d S )Nvoid(int32[::1])r      r  )	r   r  r&   r  emptyr  r  r  r  )r  r  ntidnctaidnelemr   s         r   test_simple_grid1dz$TestCudaIntrinsic.test_simple_grid1d  s    /48.//>>fvhuBH---s###sbi&6&667788888r   c                     t          j        d          t                    }d}d}|d         |d         z  |d         |d         z  f}t          j        |t          j                  }|                                } |||f         |           t          |j        d                   D ])}t          |j        d                   D ]}||z   |||f<   *| 	                    t          j
        ||k                         d S Nzvoid(int32[:,::1])r   r   r$  r%  r   r   r  )r   r  r*   r  r@  r  copyr6   r5   r  r  )	r  r  rA  rB  r5   r   r!  r   r    s	            r   test_simple_grid2dz$TestCudaIntrinsic.test_simple_grid2d  s    148011-@@a6!9$d1gq	&9:huBH---hhjjs###sy|$$ 	" 	"A39Q<(( " "EAqD		" 	scz**+++++r   c                      t          j        d          t                    }d\  }}t          j        dt          j                  } |||f         |           |                     |d         ||z             d S )Nr=  r>  r   r  r   )r   r  r/   r  r'  r  assertEqualr  r  rA  rB  r   s        r   test_simple_gridsize1dz(TestCudaIntrinsic.test_simple_gridsize1d  sy    /48.//0ABBfhq)))s###Q$/////r   zRequires too many threadsc                 D   t           j        d             }t          j        dt          j                  }t          j        dt          j                  } |d         ||           |                     |d         d           |                     |d         d           d S )Nc                 6   t          j        d          }t           j        j        t           j        j        z  t           j        j        z   }t          j        d          }t           j        j        t           j        j        z  }||k    rd| d<   ||k    rd|d<   d S d S r,   )r   r%   blockIdxr   r4   r   r.   r3   )
grid_errorgridsize_errori1i2gs1gs2s         r   fz,TestCudaIntrinsic.test_issue_9229.<locals>.f  s~    1B4=?2T^5EEB-""C-/DLN2CRxx !
1czz$%q!!! zr   r   r  )i Q   r   )r   r  r  r'  uint64rL  )r  rX  rR  rS  s       r   test_issue_9229z!TestCudaIntrinsic.test_issue_9229  s     
	& 	& 
	& Xary111
!29555 	-^444A****A.....r   zTests PTX emissionc           	         t           d d          t           t           d d          f} t          j        |          t                    } t          j        |          t                    }d}d}t          j        ddt
          j                   }|                                }d|d d<   t          j        |t
          j                   } ||df         |||           |	                    |          }	| 
                    d	t          t          j        d
|	                               t
          j                            ||d           t          j        |t
          j                   } ||df         |||           |	                    |          }	| 
                    dt          t          j        d
|	                               t
          j                            ||d           d S )N    r%     )r5   
fill_valuer   r   r$  r  r   r)   z	\s+bra\s+	branching)err_msgr   r   )r   r   r  r   r   r  fullrI  r  inspect_asmrL  r   refindalltestingassert_array_equal)
r  sigcu_branching_with_ifscu_branching_with_selpsnrH   r7   expectedrG   ptxs
             r   	test_selpzTestCudaIntrinsic.test_selp  s   QQQxaaa) -.@ A A"/$(3--0D"E"EG"28<<<6688!Iarx(((#ad#Aq!,,,#//44C
< = =>>???

%%a;%GGGIarx(((%1%aA...%11#66C
< = =>>???

%%a6%BBBBBr   c                 d    t          j        d          t                    }d}d}t          j        dt          j                  } |||f         |           |                     |d         |d         |d         z             |                     |d         |d         |d         z             d S )Nr=  rG  rH  r)   r  r   r   )r   r  r1   r  r'  r  rL  rM  s        r   test_simple_gridsize2dz(TestCudaIntrinsic.test_simple_gridsize2d  s    /48.//0ABBhq)))s###QT!W!4555QT!W!455555r   c           	      Z    t          j        d          t                    }d}d}|d         |d         z  |d         |d         z  f}t          j        |t          j                  } |||f         |           |\  }}|j        \  }}	t          t          |d                   t          |d                             D ]i\  }
}||
z   ||z   }}t          ||	|          D ]H}t          |||          D ]4}| 	                    |||f         ||z   k    |||f         ||z   f           5Ijd S rF  )
r   r  r>   r  r@  r  r5   zipr6   r  )r  r  rA  rB  r5   r   r:   r;   r<   r=   r   r    r8   r9   r   r   s                   r   test_intrinsic_forloop_stepz-TestCudaIntrinsic.test_intrinsic_forloop_step  s]   1480112HIIa6!9$d1gq	&9:huBH---s###u	d1gd1g77 	L 	LDAq"QY	FF65%00 L Lvvu55 L LAOOC1IQ$6QTAE8JKKKKLL	L 	Lr   c                     t           j        d             }t          j        dt          j                                      ddd          } |d         |           t          j                            |d           d S )Nc                     t          j        d          \  }}}t          j        d          \  }}}||z  |z  | |||f<   d S Nr   r-   )outr   r   r   rG   rH   r7   s          r   fooz*TestCudaIntrinsic.test_3dgrid.<locals>.foo	  sE    illGAq!mA&&GAq!q519C1aLLLr   i  r  	   )r   r   r   rz  )r   r  r  r'  r  reshaperf  assert_equal)r  rx  arrs      r   test_3dgridzTestCudaIntrinsic.test_3dgrid  s{    		% 	% 
	%
 hvRX...66q!Q??! !#&&&

V,,,,,r   c                 "   t           j        d             }d\  }}}t          j        ||z  |z  t          j                                      |||          } |d         |           |                     t          j        |                     d S )Nc                    t          j        d          \  }}}t          j        d          \  }}}|t           j        j        t           j        j        t           j        j        z  z   k    oq|t           j        j        t           j        j        t           j        j        z  z   k    o8|t           j        j        t           j        j        t           j        j        z  z   k    }|t           j        j        t           j	        j        z  k    oM|t           j        j        t           j	        j        z  k    o&|t           j        j        t           j	        j        z  k    }|o|| |||f<   d S rv  )
r   r%   r.   r   r   rQ  r4   r   r   r3   )	rw  r   r   r   rG   rH   r7   grid_is_rightgridsize_is_rights	            r   rx  z,TestCudaIntrinsic.test_3dgrid_2.<locals>.foo  s   illGAq!mA&&GAq!T^%$-/(III JT^%$-/(IIIJT^%$-/(III 
 "#dmo&F!F "G!"dmo&F!F"G!"dmo&F!F  )>->C1aLLLr   )   r%     r  ))r   r   r)   )r   r)   r   )r   r  r  r'  bool_r{  r  r  )r  rx  r   r   r   r}  s         r   test_3dgrid_2zTestCudaIntrinsic.test_3dgrid_2  s    		? 	? 
	? (1ahA	"(333;;Aq!DD! !#&&&s$$$$$r   c                      t          j        d          t                    }t          j        dt          j                  } |d         |d           |                     |d         d           d S )Nvoid(int32[:], uint32)r   r  r     r   r   r   r  rB   r  r'  r  rL  r  s      r   test_popc_u4zTestCudaIntrinsic.test_popc_u4)  sh    548455kBBhq)))sD!!!Q#####r   c                      t          j        d          t                    }t          j        dt          j                  } |d         |d           |                     |d         d           d S )Nzvoid(int32[:], uint64)r   r  r  l        @ r   r   r  r  s      r   test_popc_u8zTestCudaIntrinsic.test_popc_u8/  sh    548455kBBhq)))sN+++Q#####r   c                      t          j        d          t                    }t          j        dt          j                  } |d         |ddd           t          j                            |d         d	           d S )
Nzvoid(f4[:], f4, f4, f4)r   r  r         @      @      @r   r  )r   r  rI   r  r'  float32rf  assert_allcloser  s      r   test_fma_f4zTestCudaIntrinsic.test_fma_f45  n    648566zBBhq
+++sBB'''

""3q6955555r   c                      t          j        d          t                    }t          j        dt          j                  } |d         |ddd           t          j                            |d         d	           d S )
Nzvoid(f8[:], f8, f8, f8)r   r  r  r  r  r  r   r  )r   r  rI   r  r'  float64rf  r  r  s      r   test_fma_f8zTestCudaIntrinsic.test_fma_f8;  r  r   c                     t          j        d          t                    }t          j        dt          j                  }t          j        dgt          j                  }t          j        dgt          j                  } |d         |||           t          j                            |d         ||z              d S Nvoid(f2[:], f2[:], f2[:])r   r  r  r  r  r   )	r   r  rO   r  r'  float16arrayrf  r  r  r  r   arg1arg2s        r   	test_haddzTestCudaIntrinsic.test_haddA      848788EEhq
+++xBJ///xBJ///sD$'''

""3q64$;77777r   c                 V    t          j        d          t                    }t          j        dt          j                  }t          j        d          }t          j        d          } |d         |||           ||z   }t          j                            |d         |           d S )Nvoid(f2[:], f2, f2)r   r  JM!	@r  r  r   )r   r  rR   r  r'  r  rf  r  r  r  r   r  r  refs         r   test_hadd_scalarz"TestCudaIntrinsic.test_hadd_scalarJ  s    2481223EFFhq
+++z)$$z"~~sD$'''Tk

""3q63/////r   z(Compilation unsupported in the simulatorc                     t           d d          t           t           f}t          t          |d          \  }}|                     d|           d S )Nr$  r   cczadd.f16)r   r   rR   assertInr  argsrm  _s       r   test_hadd_ptxzTestCudaIntrinsic.test_hadd_ptxT  H    111r2/&AAAQi%%%%%r   c                     t          j        d          t                    }t          j        dt          j                  }t          j        dgt          j                  }t          j        dgt          j                  }t          j        dgt          j                  } |d         ||||           t          j                            |d         ||z  |z              d S )	Nz void(f2[:], f2[:], f2[:], f2[:])r   r  r  r  r  r  r   )	r   r  rW   r  r'  r  r  rf  r  )r  r  r   r  r  arg3s         r   	test_hfmazTestCudaIntrinsic.test_hfmaZ  s    ?48>??LLhq
+++xBJ///xBJ///xBJ///sD$---

""3q64$;+=>>>>>r   c                     t          j        d          t                    }t          j        dt          j                  }t          j        d          }t          j        d          }t          j        d          } |d         ||||           ||z  |z   }t          j                            |d         |           d S )	Nzvoid(f2[:], f2, f2, f2)r   r  r  r  r  r  r   )r   r  rY   r  r'  r  rf  r  )r  r  r   r  r  r  r  s          r   test_hfma_scalarz"TestCudaIntrinsic.test_hfma_scalard  s    6485667IJJhq
+++z"~~z"~~z"~~sD$---TkD 

""3q63/////r   c                     t           d d          t           t           t           f}t          t          |d          \  }}|                     d|           d S )Nr  r  z
fma.rn.f16)r   r   rY   r  r  s       r   test_hfma_ptxzTestCudaIntrinsic.test_hfma_ptxo  sK    111r2r"/&AAAQlC(((((r   c                     t          j        d          t                    }t          j        dt          j                  }t          j        dgt          j                  }t          j        dgt          j                  } |d         |||           t          j                            |d         ||z
             d S r  )	r   r  r]   r  r'  r  r  rf  r  r  s        r   	test_hsubzTestCudaIntrinsic.test_hsubu  r  r   c                 V    t          j        d          t                    }t          j        dt          j                  }t          j        d          }t          j        d          } |d         |||           ||z
  }t          j                            |d         |           d S Nr  r   r  r  gQ?r  r   )r   r  r_   r  r'  r  rf  r  r  s         r   test_hsub_scalarz"TestCudaIntrinsic.test_hsub_scalar~      2481223EFFhq
+++z)$$z$sD$'''Tk

""3q63/////r   c                     t           d d          t           t           f}t          t          |d          \  }}|                     d|           d S )Nr  r  zsub.f16)r   r   r_   r  r  s       r   test_hsub_ptxzTestCudaIntrinsic.test_hsub_ptx  r  r   c                     t          j                    t                    }t          j        dt          j                  }t          j        dgt          j                  }t          j        dgt          j                  } |d         |||           t          j                            |d         ||z             d S )Nr   r  r  r  r  r   )	r   r  rc   r  r'  r  r  rf  r  r  s        r   	test_hmulzTestCudaIntrinsic.test_hmul  s    48::k**hq
+++xBJ///xBJ///sD$'''

""3q64$;77777r   c                 V    t          j        d          t                    }t          j        dt          j                  }t          j        d          }t          j        d          } |d         |||           ||z  }t          j                            |d         |           d S r  )r   r  re   r  r'  r  rf  r  r  s         r   test_hmul_scalarz"TestCudaIntrinsic.test_hmul_scalar  r  r   c                     t           d d          t           t           f}t          t          |d          \  }}|                     d|           d S )Nr  r  zmul.f16)r   r   re   r  r  s       r   test_hmul_ptxzTestCudaIntrinsic.test_hmul_ptx  r  r   c                 V    t          j        d          t                    }t          j        dt          j                  }t          j        d          }t          j        d          } |d         |||           ||z  }t          j                            |d         |           d S r  )r   r  rh   r  r'  r  rf  r  r  s         r   test_hdiv_scalarz"TestCudaIntrinsic.test_hdiv_scalar  s    2481223EFFhq
+++z)$$z$sD$'''Tk

""3q63/////r   c                     t          j        d          t                    }t          j                            ddd                              t          j                  }t          j                            ddd                              t          j                  }t          j        |t          j                  } |	                    |j
                  |||           ||z  }t          j                            ||           d S )Nr  i    i  rj   r  )r   r  rm   r  r  randintastyper  
zeros_likeforallrj   rf  r  )r  r  arry1arry2r   r  s         r   	test_hdivzTestCudaIntrinsic.test_hdiv  s    8487889KLL	!!&%c!::AA"*MM	!!&%c!::AA"*MMmE444!!!#ue444em

""3,,,,,r   c                 >    t          j        d          t                    }t          j        dt          j                  }t          j        dgt          j                  } |d         ||           t          j                            |d         |            d S )Nvoid(f2[:], f2[:])r   r  r  r  r   )	r   r  rr   r  r'  r  r  rf  r  r  r  r   r  s       r   	test_hnegzTestCudaIntrinsic.test_hneg  s    148011+>>hq
+++xBJ///sD!!!

""3q6D511111r   c                 (    t          j        d          t                    }t          j        dt          j                  }t          j        d          } |d         ||           | }t          j                            |d         |           d S )Nvoid(f2[:], f2)r   r  r  r  r   )r   r  ru   r  r'  r  rf  r  r  r  r   r  r  s        r   test_hneg_scalarz"TestCudaIntrinsic.test_hneg_scalar  s    .48-../ABBhq
+++z)$$sD!!!e

""3q63/////r   c                     t           d d          t           f}t          t          |d          \  }}|                     d|           d S )Nr  r  zneg.f16)r   r   ru   r  r  s       r   test_hneg_ptxzTestCudaIntrinsic.test_hneg_ptx  F    111r{/&AAAQi%%%%%r   c                 T    t          j                    t                    }t          j        dt          j                  }t          j        dgt          j                  } |d         ||           t          j                            |d         t          |                     d S )Nr   r        r  r   )
r   r  rz   r  r'  r  r  rf  r  absr  s       r   	test_habszTestCudaIntrinsic.test_habs  s    48::k**hq
+++xRZ000sD!!!

""3q63t9955555r   c                 @    t          j        d          t                    }t          j        dt          j                  }t          j        d          } |d         ||           t          |          }t          j                            |d         |           d S )Nr  r   r  gJM!	r  r   )	r   r  r|   r  r'  r  r  rf  r  r  s        r   test_habs_scalarz"TestCudaIntrinsic.test_habs_scalar  s    .48-../ABBhq
+++z*%%sD!!!$ii

""3q63/////r   c                     t           d d          t           f}t          t          |d          \  }}|                     d|           d S )Nr  r  zabs.f16)r   r   r|   r  r  s       r   test_habs_ptxzTestCudaIntrinsic.test_habs_ptx  r  r   c                    t           t          t          t          t          t
          t          t          t          t          t          t          f}t          t          f}t          j        t          j        t          j        t          j        t          j        t          j        t          j        t          j        t          j        t          j        t          j        t4          f}t          j        t          j        f}d}t          j                            d           t          j                            dd|                               t          j!                  }t          j"        |          }tG          ||          D ]\  }}	| $                    |	          5   tK          j&        d          |          } |d|f         ||            |	|t          j!                  }
t          j'        (                    ||
           d d d            n# 1 swxY w Y   t          j                            dd|                               t          j!                  }tG          ||          D ]\  }}	| $                    |	          5   tK          j&        d          |          } |d|f         ||            |	|t          j!                  }
t          j'        (                    ||
           d d d            n# 1 swxY w Y   d S )	Nr]  r   r  r  fnr  r  r  ))r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  sincosloglog2log10sqrtceilfloor
reciprocaltruncrintr   r!  exp2r  r  r  r  r  r  rr  subTestr   r  rf  r  )r  kernelsexp_kernelsexpected_functionsexpected_exp_functionsr   r   r   kernelr  rl  x2s               r   test_fp16_intrinsics_commonz-TestCudaIntrinsic.test_fp16_intrinsics_common  s   m}| 	"
 #L1 fbf fbgrx grw mRXrw*	,
 #%&"'!2 
	qIaQ//66rzBBM!g'9:: 	8 	8JFB$$ 8 87"677??qsAq!!!2arz222
**1h777	8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 Yq"1--44RZ@@k+ABB 	8 	8JFB$$ 8 87"677??qsAr"""2b
333
**1h777	8 8 8 8 8 8 8 8 8 8 8 8 8 8 8	8 	8s&   A)HH
	H
	<A)K11K5	8K5	c                    t          j                    d             }d}t          j                            d           t          j                            |                              t          j                  }t          j        |          } |d|f         ||           t          j	        
                    |d|z             d S )Nc                     t          j        d          }|t          |           k     r*t           j                            ||                   | |<   d S d S r   )r   r%   r   rL   hexp10r   s      r   hexp10_vectorsz5TestCudaIntrinsic.test_hexp10.<locals>.hexp10_vectors  sF    	!A3q66zzy''!--! zr   r]  r   r  )r   r  r  r  r  randr  r  r  rf  r  )r  r  r   r   r   s        r   test_hexp10zTestCudaIntrinsic.test_hexp10  s    		. 	. 
	. 
	qINN1$$RZ00M! 	q!tQ"""

""1bAg.....r   c                    t           t          t          t          t          t
          f}t          j        t          j        t          j	        t          j
        t          j        t          j        f}t          ||          D ]~\  }}|                     |          5   t          j        d          |          }t#          j        dt"          j                  }t#          j        dt"          j                  }t#          j        d          }t#          j        d          }	t#          j        d          }
 |d         ||	|	            ||	|	          }|                     ||d	                     |d         ||	|
            ||	|
          }|                     ||d	                     |d         ||	|            ||	|          }|                     ||d	                    d d d            n# 1 swxY w Y   d S )
N)opzvoid(b1[:], f2, f2)r   r  r)   r   r   r  r   )r   r   r   r   r   r   operatoreqnegegtleltrr  r  r   r  r  r'  r  r  rL  )r  fnsopsr  r  r  rl  gotr  r  arg4s              r   test_fp16_comparisonz&TestCudaIntrinsic.test_fp16_comparison!  s    "35F "35FH{HK{HK6 #smm 	3 	3FB$$ 3 38"788<<8ARX666hq111z!}}z!}}z!}} tS$---2dD>>  3q6222 tS$---2dD>>  3q6222 tS$---2dD>>  3q6222-3 3 3 3 3 3 3 3 3 3 3 3 3 3 3	3 	3s   EG..G2	5G2	c                    t           t          t          t          t          f}|D ]}|                     |          5   t          j        d          |          }t          j	        dt          j
                  }t          j        d          }t          j        d          }t          j        d          } |d         ||||           |                     |d	                    d d d            n# 1 swxY w Y   d S )
Nr  zvoid(b1[:], f2, f2, f2)r   r  r  r  r  r  r   )r   r   r   r   r   r  r   r  r  r'  r  r  r  )r  	functionsr  r  r   r  r  r  s           r   !test_multiple_float16_comparisonsz3TestCudaIntrinsic.test_multiple_float16_comparisonsA  s$   )))))	+	
  	( 	(B$$ ( (>48$=>>rBBhq111z"~~z"~~z"~~sD$555A'''( ( ( ( ( ( ( ( ( ( ( ( ( ( (	( 	(s   B)C00C4	7C4	c                     t          j        d          t                    }t          j        dt          j                  }t          j        d          }t          j        d          } |d         |||           t          j                            |d         |           t          j        d          } |d         |||           t          j                            |d         |           d S 	Nr  r   r  r  r  r  r   g      @)r   r  r   r  r'  r  rf  r  r  s        r   	test_hmaxzTestCudaIntrinsic.test_hmaxR      2481223EFFhq
+++z"~~z"~~sD$'''

""3q64000z"~~sD$'''

""3q6400000r   c                     t          j        d          t                    }t          j        dt          j                  }t          j        d          }t          j        d          } |d         |||           t          j                            |d         |           t          j        d          } |d         |||           t          j                            |d         |           d S r  )r   r  r   r  r'  r  rf  r  r  s        r   	test_hminzTestCudaIntrinsic.test_hmin^  r  r   c                     t          j        d          t                    }t          j        dt          j                  }d} |d         ||           t          j                            |d         |dz             d S )Nzvoid(float32[:], float32)r   r  r  r  r   UUUUUU?)r   r  r   r  r'  r  rf  r  r  r  r   cbrt_args       r   test_cbrt_f32zTestCudaIntrinsic.test_cbrt_f32j  t    848788EEhq
+++sH%%%

""3q68+>?????r   c                     t          j        d          t                    }t          j        dt          j                  }d} |d         ||           t          j                            |d         |dz             d S )Nzvoid(float64[:], float64)r   r  g      @r  r   r  )r   r  r   r  r'  r  rf  r  r  s       r   test_cbrt_f64zTestCudaIntrinsic.test_cbrt_f64q  r   r   c                      t          j        d          t                    }t          j        dt          j                  } |d         |d           |                     |d         d           d S )Nzvoid(uint32[:], uint32)r   r  r  i0  r   i  )r   r  r   r  r'  uint32rL  r  s      r   test_brev_u4zTestCudaIntrinsic.test_brev_u4x  sh    648566{CChq	***sJ'''Q,,,,,r   z.only get given a Python "int", assumes 32 bitsc                      t          j        d          t                    }t          j        dt          j                  } |d         |d           |                     |d         d           d S )Nzvoid(uint64[:], uint64)r   r  r  l   0  C r   l       `x)r   r  r   r  r'  rZ  rL  r  s      r   test_brev_u8zTestCudaIntrinsic.test_brev_u8~  sj    648566{CChq	***s.///Q!344444r   c                      t          j        d          t                    }t          j        dt          j                  } |d         |d           |                     |d         d           d S )Nvoid(int32[:], int32)r   r  r     r      r   r  r   r  r'  r  rL  r  s      r   test_clz_i4zTestCudaIntrinsic.test_clz_i4  sh    448344Z@@hq)))sJ'''Q$$$$$r   c                      t          j        d          t                    }t          j        dt          j                  } |d         |d           |                     |d         d           dS )	a  
        Although the CUDA Math API
        (http://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH__INTRINSIC__INT.html)
        only says int32 & int64 arguments are supported in C code, the LLVM
        IR input supports i8, i16, i32 & i64 (LLVM doesn't have a concept of
        unsigned integers, just unsigned operations on integers).
        http://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#bit-manipulations-intrinics
        r  r   r  r  r*  r   r+  Nr,  r  s      r   test_clz_u4zTestCudaIntrinsic.test_clz_u4  sj     648455jAAhq)))sJ'''Q$$$$$r   c                      t          j        d          t                    }t          j        dt          j                  } |d         |d           |                     |d         d           d S Nr)  r   r  r  l    r   r,  r  s      r   test_clz_i4_1sz TestCudaIntrinsic.test_clz_i4_1s  h    448344Z@@hq)))sJ'''Q#####r   c                      t          j        d          t                    }t          j        dt          j                  } |d         |d           |                     |d         dd           d S )Nr)  r   r  r  r   r]  CUDA semanticsr,  r  s      r   test_clz_i4_0sz TestCudaIntrinsic.test_clz_i4_0s  sk    448344Z@@hq)))sC   Q%566666r   c                      t          j        d          t                    }t          j        dt          j                  } |d         |d           |                     |d         d           d S )Nvoid(int32[:], int64)r   r  r     r   /   r,  r  s      r   test_clz_i8zTestCudaIntrinsic.test_clz_i8  si    448344Z@@hq)))s-...Q$$$$$r   c                 B    t          j        d          t                    }t          j        dt          j                  } |d         |d           |                     |d         d            |d         |d           |                     |d         d	           d S )
Nr)  r   r  r  r*  r              r]  r   r  r   r  r'  r  rL  r  s      r   test_ffs_i4zTestCudaIntrinsic.test_ffs_i4  s    448344Z@@hq)))sJ'''Q$$$sJ'''Q$$$$$r   c                 B    t          j        d          t                    }t          j        dt          j                  } |d         |d           |                     |d         d            |d         |d           |                     |d         d	           d S )
Nr  r   r  r  r*  r   r=  r>  r]  r?  r  s      r   test_ffs_u4zTestCudaIntrinsic.test_ffs_u4  s    548455jAAhq)))sJ'''Q$$$sJ'''Q$$$$$r   c                      t          j        d          t                    }t          j        dt          j                  } |d         |d           |                     |d         d           d S r1  r?  r  s      r   test_ffs_i4_1sz TestCudaIntrinsic.test_ffs_i4_1s  r3  r   c                      t          j        d          t                    }t          j        dt          j                  } |d         |d           |                     |d         d           d S )Nr)  r   r  r  r   r?  r  s      r   test_ffs_i4_0sz TestCudaIntrinsic.test_ffs_i4_0s  sh    448344Z@@hq)))sC   Q#####r   c                 B    t          j        d          t                    }t          j        dt          j                  } |d         |d           |                     |d         d            |d         |d           |                     |d         d	           d S )
Nr8  r   r  r  r9  r   r^  l        !   r?  r  s      r   test_ffs_i8zTestCudaIntrinsic.test_ffs_i8  s    448344Z@@hq)))s-...Q$$$sK(((Q$$$$$r   c                 |    t          j        d          t                    }d}t          j        |dz  t          j                  }t          j        t          j        dt          j                  |          } |d|dz  f         |           |                     t          j	        ||k                         d S )Nr  r)   r]  r  r   )
r   r  r   r  r'  r  tiler  r  r  )r  r  countr   r!  s        r   test_simple_laneidz$TestCudaIntrinsic.test_simple_laneid  s    -48,--m<<hurz222gbi"(333U;;EBJ$$$scz**+++++r   c                      t          j        d          t                    }t          j        dt          j                  } |d         |           |                     |d         dd           d S )Nr  r   r  r  r   r]  r5  )r   r  r  r  r'  r  rL  r  s      r   test_simple_warpsizez&TestCudaIntrinsic.test_simple_warpsize  si    -48,--o>>hq)))sQ%566666r   c                 
    t          j        d          t                    }t          j        dt          j                  }dD ]=} |d         ||           |                     |d         t          |                     >d S )Nzvoid(int64[:], float32)r   r  r  g      g      g      g      ?g      @g      @g      @r  r   r   r  r   r  r'  r   rL  r   r  r  r   r   s       r   test_round_f4zTestCudaIntrinsic.test_round_f4      648566|DDhq)))@ 	/ 	/AHTN3"""SVU1XX....	/ 	/r   c                 
    t          j        d          t                    }t          j        dt          j                  }dD ]=} |d         ||           |                     |d         t          |                     >d S )Nzvoid(int64[:], float64)r   r  rQ  r  r   rR  rS  s       r   test_round_f8zTestCudaIntrinsic.test_round_f8  rU  r   c           	          t          j        d          t                    }t          j        dt          j                  }t          j                            d           t          j                            d                              t          j                  }t          j	        |t          j
        t          j        t          j         t          j        g          f           d}t          j        ||          D ]s\  }}|                     ||          5   |d         |||           |                     |d	         t#          ||          d
           d d d            n# 1 swxY w Y   td S )N void(float32[:], float32, int32)r   r  {   r]  )r   r   r)   r   r   r$  r   valr   r  r   singleprec)r   r  r   r  r'  r  r  r  r  concatenater  infnan	itertoolsproductr  assertPreciseEqualr   r  r  r   valsdigitsra  r   s          r   test_round_to_f4z"TestCudaIntrinsic.test_round_to_f4  s   ?48>??PPhq
+++
	sy##**2:66
bh'@AABCCC
 &-dF;; 	7 	7LC#w77 7 7sC111''Ac70C0C-5 ( 7 7 77 7 7 7 7 7 7 7 7 7 7 7 7 7 7	7 	7s   A E''E+	.E+	z$Overflow behavior differs on CPythonc                 2    t          j        d          t                    }t          j        dt          j                  }t          j        t          j                  j        }d} |d         |||           |                     |d         |           d S )NrY  r   r  i,  r  r   )	r   r  r   r  r'  r  finfomaxrL  r  r  r   ra  r   s        r   test_round_to_f4_overflowz+TestCudaIntrinsic.test_round_to_f4_overflow	  s     @48>??PPhq
+++hrz""& sC)))Q%%%%%r   c                     t          j        d          t                    }t          j        dt          j                  }d}d} |d         |||           |                     |d         t          ||          d	           d S )
NrY  r   r  gQ?r   r  r   rb  rc  )r   r  r   r  r'  r  rj  r   rr  s        r   test_round_to_f4_halfwayz*TestCudaIntrinsic.test_round_to_f4_halfway  s    ?48>??PPhq
+++ sC)))Ac7(;(;(KKKKKr   c           	          t          j        d          t                    }t          j        dt          j                  }t          j                            d           t          j                            d          }t          j        |t          j	        t          j
        t          j
         t          j        g          f           d}t          j        ||          D ]s\  }}|                     ||          5   |d         |||           |                     |d	         t!          ||          d
           d d d            n# 1 swxY w Y   td}d}|                     ||          5   |d         |||           |                     |d	         t!          ||          d           d d d            d S # 1 swxY w Y   d S )N void(float64[:], float64, int32)r   r  rZ  r]  )r[  r\  r]  r^  r_  r   r   r)   r   r   r$  r`  r  r   exactrc  g`8p=<   double)r   r  r   r  r'  r  r  r  re  r  rf  rg  rh  ri  r  rj  r   rk  s          r   test_round_to_f8z"TestCudaIntrinsic.test_round_to_f8!  s4   ?48>??PPhq
+++
	sy##
bh'@AABCCC7%-dF;; 	6 	6LC#w77 6 6sC111''Ac70C0C-4 ( 6 6 66 6 6 6 6 6 6 6 6 6 6 6 6 6 6 +\\c7\33 	3 	3HTN3W---##CFE#w,?,?)1 $ 3 3 3	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3s&   >A E

E	E	1A F>>GGc                 2    t          j        d          t                    }t          j        dt          j                  }t          j        t          j                  j        }d} |d         |||           |                     |d         |           d S )Nrw  r   r  r  r  r   )	r   r  r   r  r'  r  rp  rq  rL  rr  s        r   test_round_to_f8_overflowz+TestCudaIntrinsic.test_round_to_f8_overflow8  s     @48>??PPhq
+++hrz""& sC)))Q%%%%%r   c                     t          j        d          t                    }t          j        dt          j                  }d}d} |d         |||           |                     |d         t          ||          d	           d S )
Nrw  r   r  g\(\?r   r  r   rz  rc  )r   r  r   r  r'  r  rj  r   rr  s        r   test_round_to_f8_halfwayz*TestCudaIntrinsic.test_round_to_f8_halfwayE  s    ?48>??PPhq
+++ sC)))Ac7(;(;(KKKKKr   )K__name__
__module____qualname__r  r  r"  r2  r
   r8  r;  rD  rJ  rN  r[  rn  rp  rs  r~  r  r  r  r  r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r"  r%  r'  r-  r/  r2  r6  r;  r@  rB  rD  rF  rI  rM  rO  rT  rW  rn  rs  ru  r{  r}  r  __classcell__)r  s   @r   r  r  m  s           % % %, , ,0 0 0& _3445 5 545 _3449 9 5499 9 9, , ,0 0 0 _011/ / 21/0 _)**C C +*C06 6 6L L L"
- 
- 
-% % %*$ $ $$ $ $6 6 66 6 6 8 8 8 0 0 0 _?@@& & A@&
 ? ? ? 0 0 0 _?@@) ) A@)
 8 8 8 0 0 0 _?@@& & A@&
 8 8 8 0 0 0 _?@@& & A@&
 0 0 0 - - - 2 2 2 0 0 0 _?@@& & A@&
 6 6 6 0 0 0 _?@@& & A@&
  8  8  8D / / /$ 3 3 3> ( ( (  	1 	1 	1 	1 	1 	1@ @ @@ @ @- - - _EFF5 5 GF5% % %% % %$ $ $7 7 7 _EFF% % GF%% % %% % %$ $ $$ $ $ _EFF% % GF%, , ,7 7 7/ / // / /7 7 74 _;<<& & =<&L L L3 3 3. _;<<
& 
& =<
&	L 	L 	L 	L 	L 	L 	Lr   r  __main__)Yrh  numpyr  r  rd  numbar   r   
numba.cudar   numba.core.errorsr   numba.core.typesr   numba.cuda.testingr   r	   r
   r   r   r   r"   r&   r*   r/   r1   r>   rB   rI   rO   rR   rW   rY   r]   r_   rc   re   rh   rm   rr   ru   rz   r|   r   r   r   r   r   r   r  r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  mainr   r   r   <module>r     s0            				         " " " " " " ) ) ) ) ) )      3 3 3 3 3 3 3 3 3 3 3 3  
  
/ / /  
  
          ( ( (" " ". . .% % %( ( (" " "( ( (" " "" " "& & &" " "  " " "  ! ! !! ! !! ! !! ! !! ! !! ! ! 
   
  1 1 1
4 4 4
4 4 4
7 7 7
7 7 7
" " "" " "$ $ $$ $ $$ $ $% % %& & &$ $ $% % %% % %& & &  % % %& & &$ $ $& & &% % %            	 	 	) ) )  
      aL aL aL aL aL aL aL aLH zHMOOOOO r   