
    J/Phm0                        d Z ddlmZ ddlZddlZddlZddlmZ ddlZ	ddl
mZ ddlmZ  G d d	e          Z G d
 d          Z G d d          Z G d de          Z G d de          Z G d de          Z ej                    Z ej                    Z ej                    Z ej                    Z ej                    Z ej                    Z ej                    Z ej                    Z ej                    Z ej                    Z ej                    Z ej                    Z  G d de          Z! G d de          Z" G d de          Z#ed             Z$dS )zf
Implements the cuda module as called from within an executing kernel
(@cuda.jit-decorated function).
    )contextmanagerN)types)numpy_support   )vector_typesc                   *    e Zd ZdZd Zd Zd Zd ZdS )Dim3z;
    Used to implement thread/block indices/dimensions
    c                 0    || _         || _        || _        d S Nxyz)selfr   r   r   s       ^/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/numba/cuda/simulator/kernelapi.py__init__zDim3.__init__   s        c                 8    d| j         d| j        d| j        dS )N(, )r   r   s    r   __str__zDim3.__str__   s#     !%88r   c                 8    d| j         d| j        d| j        dS )NzDim3(r   r   r   r   s    r   __repr__zDim3.__repr__   s#     %)VVVTVVVTVVV<<r   c              #   @   K   | j         V  | j        V  | j        V  d S r   r   r   s    r   __iter__zDim3.__iter__!   s+      fffr   N)__name__
__module____qualname____doc__r   r   r   r    r   r   r	   r	      sZ           
9 9 9= = =    r   r	   c                       e Zd ZdZd ZdS )	GridGroupz+
    Used to implement the grid group.
    c                 P    t          j                                                     d S r   	threadingcurrent_threadsyncthreadsr   s    r   synczGridGroup.sync,   s%     	 ""..00000r   N)r   r   r    r!   r*   r"   r   r   r$   r$   '   s-         1 1 1 1 1r   r$   c                       e Zd ZdZd ZdS )
FakeCUDACgz!
    CUDA Cooperative Groups
    c                     t                      S r   )r$   r   s    r   	this_gridzFakeCUDACg.this_grid7   s    {{r   N)r   r   r    r!   r.   r"   r   r   r,   r,   3   s-             r   r,   c                       e Zd ZdZd ZdS )FakeCUDALocalz
    CUDA Local arrays
    c                     t          |t          j                  rt          j        |          }t          j        ||          S r   )
isinstancer   Typer   as_dtypenpempty)r   shapedtypes      r   arrayzFakeCUDALocal.array?   s8    eUZ(( 	2!*511Exu%%%r   N)r   r   r    r!   r9   r"   r   r   r0   r0   ;   s-         & & & & &r   r0   c                       e Zd ZdZd ZdS )FakeCUDAConstz
    CUDA Const arrays
    c                     |S r   r"   )r   arys     r   
array_likezFakeCUDAConst.array_likeI   s    
r   N)r   r   r    r!   r>   r"   r   r   r;   r;   E   s-             r   r;   c                       e Zd ZdZd Zd ZdS )FakeCUDAShareda  
    CUDA Shared arrays.

    Limitations: assumes that only one call to cuda.shared.array is on a line,
    and that that line is only executed once per thread. i.e.::

        a = cuda.shared.array(...); b = cuda.shared.array(...)

    will erroneously alias a and b, and::

        for i in range(10):
            sharedarrs[i] = cuda.shared.array(...)

    will alias all arrays created at that point (though it is not certain that
    this would be supported by Numba anyway).
    c                 l    i | _         || _        t          j        |t          j                  | _        d S N)r8   )_allocations_dynshared_sizer5   zerosbyte
_dynshared)r   dynshared_sizes     r   r   zFakeCUDAShared.__init___   s.    -(>AAAr   c                    t          |t          j                  rt          j        |          }|dk    r0| j        |j        z  }t          j        | j	        j
        ||          S t          j        t          j                              }|d         dd         }| j                            |          }|t          j        ||          }|| j        |<   |S )Nr   )r8   count   )r2   r   r3   r   r4   rD   itemsizer5   
frombufferrG   data	tracebackextract_stacksys	_getframerC   getr6   )r   r7   r8   rJ   stackcallerress          r   r9   zFakeCUDAShared.arrayd   s    eUZ(( 	2!*511E A:: (EN:E=!5U%PPPP
 '88r1Q3##F++;(5%((C(+Df%
r   N)r   r   r    r!   r   r9   r"   r   r   r@   r@   M   s?         "B B B
    r   r@   c                   b    e Zd Zd Zd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd Zd Zd Zd ZdS )FakeCUDAAtomicc                 t    t           5  ||         }||xx         |z  cc<   d d d            n# 1 swxY w Y   |S r   )addlockr   r9   indexvalolds        r   addzFakeCUDAAtomic.add        	  	 ,C%LLLCLLL	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  
   -11c                 t    t           5  ||         }||xx         |z  cc<   d d d            n# 1 swxY w Y   |S r   )sublockr\   s        r   subzFakeCUDAAtomic.sub   ra   rb   c                 t    t           5  ||         }||xx         |z  cc<   d d d            n# 1 swxY w Y   |S r   )andlockr\   s        r   and_zFakeCUDAAtomic.and_   ra   rb   c                 t    t           5  ||         }||xx         |z  cc<   d d d            n# 1 swxY w Y   |S r   )orlockr\   s        r   or_zFakeCUDAAtomic.or_   s     	  	 ,C%LLLCLLL	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  
rb   c                 t    t           5  ||         }||xx         |z  cc<   d d d            n# 1 swxY w Y   |S r   )xorlockr\   s        r   xorzFakeCUDAAtomic.xor   ra   rb   c                     t           5  ||         }||k    rd||<   n||xx         dz  cc<   d d d            n# 1 swxY w Y   |S Nr   r   )inclockr\   s        r   inczFakeCUDAAtomic.inc   s     	" 	",Cczz ee!	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 
s   %9= =c                     t           5  ||         }|dk    s||k    r|||<   n||xx         dz  cc<   d d d            n# 1 swxY w Y   |S rp   )declockr\   s        r   deczFakeCUDAAtomic.dec   s     	" 	",CqcCii"ee!	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 
s   +?AAc                 ^    t           5  ||         }|||<   d d d            n# 1 swxY w Y   |S r   )exchlockr\   s        r   exchzFakeCUDAAtomic.exch   sv     	 	,CE%L	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 
s   "&&c                 z    t           5  ||         }t          ||          ||<   d d d            n# 1 swxY w Y   |S r   )maxlockmaxr\   s        r   r{   zFakeCUDAAtomic.max   ~     	) 	),CsC==E%L	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 
   044c                 z    t           5  ||         }t          ||          ||<   d d d            n# 1 swxY w Y   |S r   )minlockminr\   s        r   r   zFakeCUDAAtomic.min   r|   r}   c                     t           5  ||         }t          j        ||         |g          ||<   d d d            n# 1 swxY w Y   |S r   )rz   r5   nanmaxr\   s        r   r   zFakeCUDAAtomic.nanmax        	: 	:,C9eElC%899E%L	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 
   (<A A c                     t           5  ||         }t          j        ||         |g          ||<   d d d            n# 1 swxY w Y   |S r   )r   r5   nanminr\   s        r   r   zFakeCUDAAtomic.nanmin   r   r   c                     t           5  d|j        z  }||         }||k    r|||<   |cd d d            S # 1 swxY w Y   d S )N)r   )compare_and_swaplockndim)r   r9   r_   r^   r]   loadeds         r   compare_and_swapzFakeCUDAAtomic.compare_and_swap   s    ! 	 	5:%E5\F}}"e	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   488c                 n    t           5  ||         }||k    r|||<   |cd d d            S # 1 swxY w Y   d S r   )caslock)r   r9   r]   r_   r^   r   s         r   caszFakeCUDAAtomic.cas   s     	 	5\F}}"e		 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   *..N)r   r   r    r`   re   rh   rk   rn   rr   ru   rx   r{   r   r   r   r   r   r"   r   r   rY   rY      s                                      r   rY   c                       e Zd Zd Zd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z dS ) FakeCUDAFp16c                     ||z   S r   r"   r   abs      r   haddzFakeCUDAFp16.hadd       1ur   c                     ||z
  S r   r"   r   s      r   hsubzFakeCUDAFp16.hsub   r   r   c                     ||z  S r   r"   r   s      r   hmulzFakeCUDAFp16.hmul   r   r   c                     ||z  S r   r"   r   s      r   hdivzFakeCUDAFp16.hdiv   r   r   c                     ||z  |z   S r   r"   r   r   r   cs       r   hfmazFakeCUDAFp16.hfma       1uqyr   c                     | S r   r"   r   r   s     r   hnegzFakeCUDAFp16.hneg   s	    r	r   c                      t          |          S r   )absr   s     r   habszFakeCUDAFp16.habs   s    1vvr   c                 B    t          j        |t           j                  S rB   )r5   sinfloat16r   r   s     r   hsinzFakeCUDAFp16.hsin       varz****r   c                 B    t          j        |t           j                  S rB   )r5   cosr   r   s     r   hcoszFakeCUDAFp16.hcos  r   r   c                 B    t          j        |t           j                  S rB   )r5   logr   r   s     r   hlogzFakeCUDAFp16.hlog  r   r   c                 B    t          j        |t           j                  S rB   )r5   log2r   r   s     r   hlog2zFakeCUDAFp16.hlog2      wq
++++r   c                 B    t          j        |t           j                  S rB   )r5   log10r   r   s     r   hlog10zFakeCUDAFp16.hlog10      x,,,,r   c                 B    t          j        |t           j                  S rB   )r5   expr   r   s     r   hexpzFakeCUDAFp16.hexp  r   r   c                 B    t          j        |t           j                  S rB   )r5   exp2r   r   s     r   hexp2zFakeCUDAFp16.hexp2  r   r   c                 0    t          j        d|z            S )N
   r5   r   r   s     r   hexp10zFakeCUDAFp16.hexp10  s    z"'"""r   c                 B    t          j        |t           j                  S rB   )r5   sqrtr   r   s     r   hsqrtzFakeCUDAFp16.hsqrt  r   r   c                 0    t          j        |dz            S )Ng      r   r   s     r   hrsqrtzFakeCUDAFp16.hrsqrt  s    z!t)$$$r   c                 B    t          j        |t           j                  S rB   r5   ceilr   r   s     r   hceilzFakeCUDAFp16.hceil  r   r   c                 B    t          j        |t           j                  S rB   r   r   s     r   hfloorzFakeCUDAFp16.hfloor   r   r   c                 B    t          j        |t           j                  S rB   )r5   
reciprocalr   r   s     r   hrcpzFakeCUDAFp16.hrcp#  s    }Qbj1111r   c                 B    t          j        |t           j                  S rB   )r5   truncr   r   s     r   htrunczFakeCUDAFp16.htrunc&  r   r   c                 B    t          j        |t           j                  S rB   )r5   rintr   r   s     r   hrintzFakeCUDAFp16.hrint)  r   r   c                     ||k    S r   r"   r   s      r   heqzFakeCUDAFp16.heq,      Avr   c                     ||k    S r   r"   r   s      r   hnezFakeCUDAFp16.hne/  r   r   c                     ||k    S r   r"   r   s      r   hgezFakeCUDAFp16.hge2  r   r   c                     ||k    S r   r"   r   s      r   hgtzFakeCUDAFp16.hgt5      1ur   c                     ||k    S r   r"   r   s      r   hlezFakeCUDAFp16.hle8  r   r   c                     ||k     S r   r"   r   s      r   hltzFakeCUDAFp16.hlt;  r   r   c                 "    t          ||          S r   )r{   r   s      r   hmaxzFakeCUDAFp16.hmax>      1ayyr   c                 "    t          ||          S r   )r   r   s      r   hminzFakeCUDAFp16.hminA  r   r   N)!r   r   r    r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r"   r   r   r   r      s                     + + ++ + ++ + +, , ,- - -+ + +, , ,# # #, , ,% % %, , ,, , ,2 2 2- - -, , ,                  r   r   c                   T   e Zd ZdZd Zed             Zed             Zed             Zed             Z	ed             Z
ed             Zed	             Zed
             Zed             Zed             Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd ZdS )FakeCUDAModulea7  
    An instance of this class will be injected into the __globals__ for an
    executing function in order to implement calls to cuda.*. This will fail to
    work correctly if the user code does::

        from numba import cuda as something_else

    In other words, the CUDA module must be called cuda.
    c                    t          | | _        t          | | _        t                      | _        t                      | _        t          |          | _        t                      | _
        t                      | _        t                      | _        t          j                    D ]1\  }}t#          | ||           |j        D ]}t#          | ||           2d S r   )r	   gridDimblockDimr,   _cgr0   _localr@   _sharedr;   _constrY   _atomicr   _fp16r   itemssetattraliases)r   grid_dim	block_dimrH   namesvtyaliass          r   r   zFakeCUDAModule.__init__P  s    Xi(<<#oo%n55#oo%''!^^
 ',.. 	+ 	+JD$D$%%% + +eT****+	+ 	+r   c                     | j         S r   )r   r   s    r   cgzFakeCUDAModule.cgc  s	    xr   c                     | j         S r   )r   r   s    r   localzFakeCUDAModule.localg  
    {r   c                     | j         S r   )r   r   s    r   sharedzFakeCUDAModule.sharedk  
    |r   c                     | j         S r   )r   r   s    r   constzFakeCUDAModule.consto  r  r   c                     | j         S r   )r   r   s    r   atomiczFakeCUDAModule.atomics  r  r   c                     | j         S r   )r   r   s    r   fp16zFakeCUDAModule.fp16w  s
    zr   c                 2    t          j                    j        S r   )r'   r(   	threadIdxr   s    r   r  zFakeCUDAModule.threadIdx{  s    '))33r   c                 2    t          j                    j        S r   )r'   r(   blockIdxr   s    r   r  zFakeCUDAModule.blockIdx  s    '))22r   c                     dS N    r"   r   s    r   warpsizezFakeCUDAModule.warpsize  s    rr   c                 8    t          j                    j        dz  S r  )r'   r(   	thread_idr   s    r   laneidzFakeCUDAModule.laneid  s    '))3b88r   c                 P    t          j                                                     d S r   r&   r   s    r   r)   zFakeCUDAModule.syncthreads  s#     ""..00000r   c                     d S r   r"   r   s    r   threadfencezFakeCUDAModule.threadfence      r   c                     d S r   r"   r   s    r   threadfence_blockz FakeCUDAModule.threadfence_block  r  r   c                     d S r   r"   r   s    r   threadfence_systemz!FakeCUDAModule.threadfence_system  r  r   c                 N    t          j                                        |          S r   )r'   r(   syncthreads_countr   r^   s     r   r"  z FakeCUDAModule.syncthreads_count  s    '));;C@@@r   c                 N    t          j                                        |          S r   )r'   r(   syncthreads_andr#  s     r   r%  zFakeCUDAModule.syncthreads_and  s    '))99#>>>r   c                 N    t          j                                        |          S r   )r'   r(   syncthreads_orr#  s     r   r'  zFakeCUDAModule.syncthreads_or  s    '))88===r   c                 F    t          |                              d          S )N1)binrJ   r#  s     r   popczFakeCUDAModule.popc  s    3xx~~c"""r   c                     ||z  |z   S r   r"   r   s       r   fmazFakeCUDAModule.fma  r   r   c                     |dz  S )NgUUUUUU?r"   r   s     r   cbrtzFakeCUDAModule.cbrt  s    U|r   c                 Z    t          d                    |          d d d         d          S )N{:032b}rL   )intformatr#  s     r   brevzFakeCUDAModule.brev  s+    9##C((2.222r   c                     d                     |          }t          |          t          |                    d                    z
  S )Nr1  0)r4  lenlstrip)r   r^   ss      r   clzzFakeCUDAModule.clz  s8    S!!1vvAHHSMM****r   c                     d                     |          }t          |          t          |                    d                    z
  dz   dz  }|S )Nr1  r7  r   !   )r4  r8  rstrip)r   r^   r:  rs       r   ffszFakeCUDAModule.ffs  sH     S!!VVc!((3--(((1,2r   c                     |r|n|S r   r"   r   s       r   selpzFakeCUDAModule.selp  s    }qq1r   c                    | j         }| j        }| j        }|j        |j        z  |j        z   }|dk    r|S |j        |j        z  |j        z   }|dk    r||fS |j        |j        z  |j        z   }|dk    r|||fS t          d|z            )Nr   rL      z*Global ID has 1-3 dimensions. %d requested)r   r  r  r   r   r   RuntimeError)r   nbdimbidtidr   r   r   s           r   gridzFakeCUDAModule.grid  s    }mnEDFNSU"66HEDFNSU"66q6MEDFNSU"66q!9G!KLLLr   c                     | j         }| j        }|j        |j        z  }|dk    r|S |j        |j        z  }|dk    r||fS |j        |j        z  }|dk    r|||fS t          d|z            )Nr   rL   rD  z,Global grid has 1-3 dimensions. %d requested)r   r   r   r   r   rE  )r   rF  rG  gdimr   r   r   s          r   gridsizezFakeCUDAModule.gridsize  s    }|FTVO66HFTVO66q6MFTVO66q!9IAMNNNr   N) r   r   r    r!   r   propertyr  r  r  r	  r  r  r  r  r  r  r)   r  r  r   r"  r%  r'  r+  r-  r/  r5  r;  r@  rB  rJ  rM  r"   r   r   r   r   E  s6        + + +&   X   X   X   X   X   X 4 4 X4 3 3 X3   X 9 9 X91 1 1      A A A? ? ?> > ># # #    3 3 3+ + +    M M M O O O O Or   r   c              #   n  K   ddl m | j        }t          fd|                                D                       }t          fd|                                D                       }|                    |           	 d V  |                    |           d S # |                    |           w xY w)Nr   )cudac              3   .   K   | ]\  }}|u 	||fV  d S r   r"   ).0kvrP  s      r   	<genexpr>z&swapped_cuda_module.<locals>.<genexpr>  s/      AA41aqDyyAyyyyAAr   c              3   &   K   | ]\  }}|fV  d S r   r"   )rR  rS  rT  fake_cuda_modules      r   rU  z&swapped_cuda_module.<locals>.<genexpr>  s-      ??$!Q$%??????r   )numbarP  __globals__dictr   update)fnrW  fn_globsorigreplrP  s    `   @r   swapped_cuda_moduler`    s      ~HAAAA8>>#3#3AAAAAD????$**,,?????DOOD 	s   B B4)%r!   
contextlibr   rR   r'   rP   
numba.corer   numpyr5   numba.npr   r   objectr	   r$   r,   r0   r;   r@   Lockr[   rd   rg   rj   rm   rz   r   r   r   rq   rt   rw   rY   r   r   r`  r"   r   r   <module>rg     s   
 & % % % % % 



                   " " " " " " & & & & & &    6   *	1 	1 	1 	1 	1 	1 	1 	1       & & & & &F & & &    F   , , , , ,V , , ,^ ).


).


).

			
).


).


).

%y~'' 
).


).


).

9>\ \ \ \ \V \ \ \~Y Y Y Y Y6 Y Y YxXO XO XO XO XOV XO XO XOv     r   