
    J/Phg                         d dl Z d dlmZ d dlmZmZ ddlmZ d Zd Z	d Z
d	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zddddd d!d"d#d$d%d&d'd(d)d*Zd+ Z G d, d-e           Z!d. Z"dS )/    N)ir)cgutilstargetconfig   )nvvmc                 <   dt          |          z   dz   }t          j        t          j        |          t          j        t          j        |                    t          j        |          t          j        |          f          }t          j        | ||          S )N___numba_atomic_i	_cas_hack)strr   FunctionTypeIntTypePointerTyper   get_or_insert_function)lmodisizefnamefntys       T/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/numba/cuda/nvvmutils.pydeclare_atomic_cas_intr      s    #e**,{:E?2:e,,N2:e+<+<==Ju--Ju--/0 0D )$e<<<    c                 `    |                      |||dd          }|                     |d          S )N	monotonicr   )cmpxchgextract_value)builderr   r   ptrcmpvalouts          r   atomic_cmpxchgr       s1    
//#sCk
B
BC  a(((r   c                     d}t          j        t          j                    t          j        t          j                    d          t          j                    f          }t	          j        | ||          S )Nz#llvm.nvvm.atomic.load.add.f32.p0f32r   r   r   	FloatTyper   r   r   r   r   r   s      r   declare_atomic_add_float32r%      sW    1E?2<>>N2<>>1==r|~~NP PD)$e<<<r   c                 P   t          j                                                    }|j        dk    rd}nd}t	          j        t	          j                    t	          j        t	          j                              t	          j                    f          }t          j	        | ||          S )N)   r   z#llvm.nvvm.atomic.load.add.f64.p0f64___numba_atomic_double_add)
r   ConfigStacktopcompute_capabilityr   r   
DoubleTyper   r   r   )r   flagsr   r   s       r   declare_atomic_add_float64r.      s    $&&**,,E6))5,?2=??N2=??;;R]__MO OD)$e<<<r   c                     d}t          j        t          j                    t          j        t          j                              t          j                    f          }t	          j        | ||          S )N___numba_atomic_float_subr"   r$   s      r   declare_atomic_sub_float32r1   '   U    'E?2<>>N2<>>::BLNNKM MD)$e<<<r   c                     d}t          j        t          j                    t          j        t          j                              t          j                    f          }t	          j        | ||          S )N___numba_atomic_double_subr   r   r,   r   r   r   r$   s      r   declare_atomic_sub_float64r6   .   U    (E?2=??N2=??;;R]__MO OD)$e<<<r   c                     d}t          j        t          j        d          t          j        t          j        d                    t          j        d          f          }t	          j        | ||          S )Nz"llvm.nvvm.atomic.load.inc.32.p0i32    r   r   r   r   r   r   r$   s      r   declare_atomic_inc_int32r;   5   [    0E?2:b>>N2:b>>::BJrNNKM MD)$e<<<r   c                     d}t          j        t          j        d          t          j        t          j        d                    t          j        d          f          }t	          j        | ||          S )N___numba_atomic_u64_inc@   r:   r$   s      r   declare_atomic_inc_int64r@   <   [    %E?2:b>>N2:b>>::BJrNNKM MD)$e<<<r   c                     d}t          j        t          j        d          t          j        t          j        d                    t          j        d          f          }t	          j        | ||          S )Nz"llvm.nvvm.atomic.load.dec.32.p0i32r9   r:   r$   s      r   declare_atomic_dec_int32rC   C   r<   r   c                     d}t          j        t          j        d          t          j        t          j        d                    t          j        d          f          }t	          j        | ||          S )N___numba_atomic_u64_decr?   r:   r$   s      r   declare_atomic_dec_int64rF   J   rA   r   c                     d}t          j        t          j                    t          j        t          j                              t          j                    f          }t	          j        | ||          S )N___numba_atomic_float_maxr"   r$   s      r   declare_atomic_max_float32rI   Q   r2   r   c                     d}t          j        t          j                    t          j        t          j                              t          j                    f          }t	          j        | ||          S )N___numba_atomic_double_maxr5   r$   s      r   declare_atomic_max_float64rL   X   r7   r   c                     d}t          j        t          j                    t          j        t          j                              t          j                    f          }t	          j        | ||          S )N___numba_atomic_float_minr"   r$   s      r   declare_atomic_min_float32rO   _   r2   r   c                     d}t          j        t          j                    t          j        t          j                              t          j                    f          }t	          j        | ||          S )N___numba_atomic_double_minr5   r$   s      r   declare_atomic_min_float64rR   f   r7   r   c                     d}t          j        t          j                    t          j        t          j                              t          j                    f          }t	          j        | ||          S )N___numba_atomic_float_nanmaxr"   r$   s      r   declare_atomic_nanmax_float32rU   m   U    *E?2<>>N2<>>::BLNNKM MD)$e<<<r   c                     d}t          j        t          j                    t          j        t          j                              t          j                    f          }t	          j        | ||          S )N___numba_atomic_double_nanmaxr5   r$   s      r   declare_atomic_nanmax_float64rY   t   U    +E?2=??N2=??;;R]__MO OD)$e<<<r   c                     d}t          j        t          j                    t          j        t          j                              t          j                    f          }t	          j        | ||          S )N___numba_atomic_float_nanminr"   r$   s      r   declare_atomic_nanmin_float32r]   {   rV   r   c                     d}t          j        t          j                    t          j        t          j                              t          j                    f          }t	          j        | ||          S )N___numba_atomic_double_nanminr5   r$   s      r   declare_atomic_nanmin_float64r`      rZ   r   c                     d}t          j        t          j        d          t          j        d          f          }t          j        | ||          S )NcudaCGGetIntrinsicHandler?   r9   r   r   r   r   r   r$   s      r    declare_cudaCGGetIntrinsicHandlerd      sE    &E?2:b>>JrNN,. .D)$e<<<r   c                     d}t          j        t          j        d          t          j        d          t          j        d          f          }t          j        | ||          S )NcudaCGSynchronizer9   r?   rc   r$   s      r   declare_cudaCGSynchronizerg      sO    E?2:b>>JrNNBJrNN;= =D)$e<<<r   c                 f   | j         j        j        }t          j        |                    d          dz             }t          j        ||j        dt          j	                  }d|_
        d|_        ||_        |                     |t          j        t          j        d                    d          S )	Nzutf-8    _str)name	addrspaceinternalT   generic)basic_blockfunctionmoduler   make_bytearrayencodeadd_global_variabletyper   ADDRSPACE_CONSTANTlinkageglobal_constantinitializeraddrspacecastr   r   r   )r   valuer   cvalgls        r   declare_stringr      s    '.D!%,,w"7"7'"ABBD		$T496/3/F
H 
H 
HBBJBBN  R^BJqMM%B%BINNNr   c                     t          j        t          j        d                    }t          j        t          j        d          ||g          }t	          j        | |d          }|S )Nrn   r9   vprintf)r   r   r   r   r   r   )r   	voidptrty	vprintftyr   s       r   declare_vprintr      sR    rz!}}--I 
2I0FGGI,T9iHHGNr   zllvm.nvvm.read.ptx.sreg.tid.xzllvm.nvvm.read.ptx.sreg.tid.yzllvm.nvvm.read.ptx.sreg.tid.zzllvm.nvvm.read.ptx.sreg.ntid.xzllvm.nvvm.read.ptx.sreg.ntid.yzllvm.nvvm.read.ptx.sreg.ntid.zzllvm.nvvm.read.ptx.sreg.ctaid.xzllvm.nvvm.read.ptx.sreg.ctaid.yzllvm.nvvm.read.ptx.sreg.ctaid.zz llvm.nvvm.read.ptx.sreg.nctaid.xz llvm.nvvm.read.ptx.sreg.nctaid.yz llvm.nvvm.read.ptx.sreg.nctaid.zz llvm.nvvm.read.ptx.sreg.warpsizezllvm.nvvm.read.ptx.sreg.laneid)ztid.xztid.yztid.zzntid.xzntid.yzntid.zzctaid.xzctaid.yzctaid.zznctaid.xznctaid.yznctaid.zwarpsizelaneidc                     | j         }t          j        t          j        d          d          }t	          j        ||t          |                   }|                     |d          S )Nr9    )rr   r   r   r   r   r   SREG_MAPPINGcall)r   rk   rr   r   fns        r   	call_sregr      sO    ^F?2:b>>2..D		'l46H	I	IB<<Br   c                   2    e Zd Zd Zd Zd Zd Zd Zd ZdS )SRegBuilderc                     || _         d S N)r   )selfr   s     r   __init__zSRegBuilder.__init__   s    r   c                 2    t          | j        d|z            S )Nztid.%sr   r   r   xyzs     r   tidzSRegBuilder.tid   s    x#~666r   c                 2    t          | j        d|z            S )Nzctaid.%sr   r   s     r   ctaidzSRegBuilder.ctaid   s    zC'7888r   c                 2    t          | j        d|z            S )Nzntid.%sr   r   s     r   ntidzSRegBuilder.ntid   s    y3777r   c                 2    t          | j        d|z            S )Nz	nctaid.%sr   r   s     r   nctaidzSRegBuilder.nctaid   s    {S'8999r   c                    t          j        d          }| j                            |                     |          |          }| j                            |                     |          |          }| j                            |                     |          |          }| j                            | j                            ||          |          }|S )Nr?   )	r   r   r   sextr   r   r   addmul)r   r   i64r   r   r   ress          r   getdimzSRegBuilder.getdim   s    jnnls33|  355""4::c??C88lt|//f==sCC
r   N)	__name__
__module____qualname__r   r   r   r   r   r   r   r   r   r   r      sn          7 7 79 9 98 8 8: : :    r   r   c                     t          |           fddD             }t          t          j        |d |                    }|dk    r|d         S |S )Nc              3   B   K   | ]}                     |          V  d S r   )r   ).0r   sregs     r   	<genexpr>z get_global_id.<locals>.<genexpr>   s/      	,	,s$++c

	,	,	,	,	,	,r   r   r   r   )r   list	itertoolsislice)r   dimitseqr   s       @r   get_global_idr      s\    wD	,	,	,	,e	,	,	,B
yD#..
/
/C
axx1v
r   )#r   llvmliter   
numba.corer   r   cudadrvr   r   r    r%   r.   r1   r6   r;   r@   rC   rF   rI   rL   rO   rR   rU   rY   r]   r`   rd   rg   r   r   r   r   objectr   r   r   r   r   <module>r      s$             , , , , , , , ,      = = =) ) )
= = == = == = == = == = == = == = == = == = == = == = == = == = == = == = == = == = == = =	O 	O 	O   -,,...0002222.% ,         &   2    r   