
    J/Ph_                     ^   d dl mZ d dlmZmZ d dlmZ d dlmZm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d	 Zed
             Zed             Zed             Z e ej        e          dd          d             Zed             Zd Zed             Zed             Zed             ZdS )    )ir)cudatypes)cgutils)RequireLiteralValueNumbaValueError)	signature)overload_attribute)	nvvmutils)	intrinsicc                     | j         }|dk    rt          j        }n3|dv r t          j        t          j        |          }nt	          d          t          |t          j                  S )N   )      zargument can only be 1, 2, 3)literal_valuer   int64UniTupler   r	   int32)ndimvalrestypes      U/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/numba/cuda/intrinsics.py_type_grid_functionr      s[    

C
axx+	.c22<===Wek***    c                     t          |t          j                  st          |          t	          |          }d }||fS )a  grid(ndim)

    Return the absolute position of the current thread in the entire grid of
    blocks.  *ndim* should correspond to the number of dimensions declared when
    instantiating the kernel. If *ndim* is 1, a single integer is returned.
    If *ndim* is 2 or 3, a tuple of the given number of integers is returned.

    Computation of the first integer is as follows::

        cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x

    and is similar for the other two indices, but using the ``y`` and ``z``
    attributes.
    c                     |j         }|t          j        k    rt          j        |d          S t          |t          j                  r0t          j        ||j                  }t          j	        ||          S d S )Nr   )dim)
return_typer   r   r   get_global_id
isinstancer   countr   
pack_array)contextbuildersigargsr   idss         r   codegenzgrid.<locals>.codegen1   st    /ek!!*7::::00 	4)'w}EEEC%gs333	4 	4r   r    r   IntegerLiteralr   r   )	typingctxr   r%   r(   s       r   gridr,      sO    " dE011 (!$'''
d
#
#C4 4 4 <r   c                     t          |t          j                  st          |          t	          |          }d fd}||fS )a  gridsize(ndim)

    Return the absolute size (or shape) in threads of the entire grid of
    blocks. *ndim* should correspond to the number of dimensions declared when
    instantiating the kernel. If *ndim* is 1, a single integer is returned.
    If *ndim* is 2 or 3, a tuple of the given number of integers is returned.

    Computation of the first integer is as follows::

        cuda.blockDim.x * cuda.gridDim.x

    and is similar for the other two indices, but using the ``y`` and ``z``
    attributes.
    c                    t          j        d          }t          j        | d|           }t          j        | d|           }|                     |                     ||          |                     ||                    S )N@   zntid.znctaid.)r   IntTyper   	call_sregmulsext)r$   r   i64ntidnctaids        r   _nthreads_for_dimz#gridsize.<locals>._nthreads_for_dimR   so    jnn"7MCMM::$Wooo>>{{7<<c22GLL4M4MNNNr   c                 D   |j         } |d          }|t          j        k    r|S t          |t          j                  r] |d          }|j        dk    rt          j        |||f          S |j        dk    r& |d          }t          j        ||||f          S d S d S )Nxyr   r   z)r   r   r   r    r   r!   r   r"   )	r#   r$   r%   r&   r   nxnynzr7   s	           r   r(   zgridsize.<locals>.codegenX   s    /w,,ek!!I00 	A""7C00B}!!)'B8<<<!##&&w44)'BB<@@@	A 	A
 $#r   r)   )r+   r   r%   r(   r7   s       @r   gridsizer?   <   sq    " dE011 (!$'''
d
#
#CO O OA A A A A <r   c                 B    t          t          j                  }d }||fS )Nc                 ,    t          j        |d          S )Nwarpsize)r   r1   )r#   r$   r%   r&   s       r   r(   z_warpsize.<locals>.codegenn   s    "7J777r   )r	   r   r   r+   r%   r(   s      r   	_warpsizerD   j   s,    
EK
 
 C8 8 8 <r   rB   r   )targetc                     d }|S )z_
    The size of a warp. All architectures implemented to date have a warp size
    of 32.
    c                     t                      S )N)rD   )mods    r   getzcuda_warpsize.<locals>.getz   s    {{r    )rH   rI   s     r   cuda_warpsizerK   t   s      Jr   c                 B    t          t          j                  }d }||fS )a  
    Synchronize all threads in the same thread block.  This function implements
    the same pattern as barriers in traditional multi-threaded programming: this
    function waits until all threads in the block call it, at which point it
    returns control to all its callers.
    c                     d}|j         }t          j        t          j                    d          }t	          j        |||          }|                    |d           |                                 S )Nzllvm.nvvm.barrier0rJ   )moduler   FunctionTypeVoidTyper   get_or_insert_functioncallget_dummy_value)r#   r$   r%   r&   fnamelmodfntysyncs           r   r(   zsyncthreads.<locals>.codegen   s_    $~r{}}b11-dD%@@T2&&(((r   )r	   r   nonerC   s      r   syncthreadsrY      s.     EJ

C) ) ) <r   c                     t          |t          j                  sd S t          t          j        t          j                  }fd}||fS )Nc                     t          j        t          j        d          t          j        d          f          }t          j        |j        |          }|                    ||          S )N    )r   rO   r0   r   rQ   rN   rR   )r#   r$   r%   r&   rV   rW   rT   s         r   r(   z'_syncthreads_predicate.<locals>.codegen   sQ    rz"~~
2/@AA-gndEJJ||D$'''r   )r    r   Integerr	   i4)r+   	predicaterT   r%   r(   s     `  r   _syncthreads_predicater`      sT    i// t
EHeh
'
'C( ( ( ( (
 <r   c                 (    d}t          | ||          S )z
    syncthreads_count(predicate)

    An extension to numba.cuda.syncthreads where the return value is a count
    of the threads where predicate is true.
    zllvm.nvvm.barrier0.popcr`   r+   r_   rT   s      r   syncthreads_countrd      s     &E!)Y>>>r   c                 (    d}t          | ||          S )z
    syncthreads_and(predicate)

    An extension to numba.cuda.syncthreads where 1 is returned if predicate is
    true for all threads or 0 otherwise.
    zllvm.nvvm.barrier0.andrb   rc   s      r   syncthreads_andrf      s     %E!)Y>>>r   c                 (    d}t          | ||          S )z
    syncthreads_or(predicate)

    An extension to numba.cuda.syncthreads where 1 is returned if predicate is
    true for any thread or 0 otherwise.
    zllvm.nvvm.barrier0.orrb   rc   s      r   syncthreads_orrh      s     $E!)Y>>>r   N)llvmliter   numbar   r   
numba.corer   numba.core.errorsr   r   numba.core.typingr	   numba.core.extendingr
   
numba.cudar   numba.cuda.extendingr   r   r,   r?   rD   ModulerK   rY   r`   rd   rf   rh   rJ   r   r   <module>rr      s                       B B B B B B B B ' ' ' ' ' ' 3 3 3 3 3 3             * * * * * *	+ 	+ 	+   @ * * *Z    LEL&&
6BBB  CB   (   ? ? ? ? ? ? ? ? ? ? ?r   