
    J/Ph                     (   d dl mZ d dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZ d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d
dlmZ d dlmZ d dlmZmZmZ d dlm Z m!Z!  e
            Z"e"j#        Z#e"j$        Z%e"j&        Z&d Z' e% ej(        e          d          d             Z) e% ej(        e          d          d             Z* e% ej(        e          d          d             Z+ e% ej(        e          d          d             Z, e% ej(        e          d          d             Z- e%e d          d             Z. e%e d          d             Z/ e%e d          d             Z0 e#ej1        j2        ej3                  d              Z4d a5d! Z6 e#ej7        j8        ej9        ej:                  d"             Z; e#ej7        j8        ej<        ej:                   e#ej7        j8        ej=        ej:                  d#                         Z> e#ej?        j8        ej9        ej:                  d$             Z@ e#ej?        j8        ej<        ej:                   e#ej?        j8        ej=        ej:                  d%                         ZA e#ejB                  d&             ZC e#ejD                  d'             ZE e#ejF                  d(             ZG e#ejH                  d)             ZI e#ejH        ejJ                  d*             ZK e#ejL        ejJ        ejJ        ejJ        ejJ        ejJ                   e#ejL        ejJ        ejJ        ejM        ejJ        ejJ                   e#ejL        ejJ        ejJ        ejN        ejJ        ejJ                   e#ejL        ejJ        ejJ        ejO        ejJ        ejJ                  d+                                                 ZP e#ejQ        ejJ        ejJ        ejR                  d,             ZS e#ejT        ejJ        ejJ                   e#ejT        ejJ        ejM                   e#ejT        ejJ        ejN                   e#ejT        ejJ        ejO                  d-                                                 ZU e#ejV        ejJ        ejJ                   e#ejV        ejJ        ejM                   e#ejV        ejJ        ejN                   e#ejV        ejJ        ejO                  d.                                                 ZW e#ejX                  d/             ZY e#ejZ                  d0             Z[ e#ej\        ej:                  d1             Z] e#ej^        ej:        ej:        ej:                  d2             Z_d3 Z` eeja        ejb                  d4             Zc eejb        eja                  d5             Zdd6 Ze eeja        ejf                  d7             Zg eejf        eja                   eej9        eja                  d8                         Zhd9 Zi eiejj        jk        d:            eiejl        d:            eiejm        d:            eiejj        jn        d;            eiejo        d;            eiejp        d;            eiejj        jq        d<            eiejr        d<            eiejs        d<            e#ejj        jt        eja                  d=             Zu e#ejv        eja                  d>             Zw e#ejj        jx        eja                  d?             Zy e#ezeja                  d@             Z{ e#ejj        j|        eja        eja        eja                  dA             Z} e#ej~        eja        eja                   e#ej        eja        eja                  dB                         ZdCZdD Z  e#ejj        j        eja        eja                   edE                       e#ej        eja        eja                   edE                       e#ejj        j        eja        eja                   edF                       e#ej        eja        eja                   edF                       e#ejj        j        eja        eja                   edG                       e#ej        eja        eja                   edG                       e#ejj        j        eja        eja                   edH                       e#ej        eja        eja                   edH                       e#ejj        j        eja        eja                   edI                       e#ej        eja        eja                   edI                       e#ejj        j        eja        eja                   edJ                       e#ej        eja        eja                   edJ                     dK Z eejj        j        dLdH            eejj        j        dMdJ           ej        dNej        dOiZ e#ej        ej                   e#ej        ej                  dP                         Z e#ej        ej                  dQ             Z e#ej        ej                  dR             Z e#ej        ej:                  dS             Z e#ej        ejJ                   e#ej        ej                  dT                         Z e#ej        ejM                   e#ej        ej                  dU                         Z e#ej        ej:        ej:        ej:                  dV             Z e#eejN        ejN                  dW             Z e#eejO        ejN                   e#eejN        ejO                   e#eejO        ejO                  dX                                     Z e#eejN        ejN                  dY             Z e#eejO        ejN                   e#eejN        ejO                   e#eejO        ejO                  dZ                                     Z e#eejN                   e#eejO                  d[                         Z e#eejN        ejf                   e#eejO        ejf                  d\                         Zd] Zej        d^z  Zd^ej        z  Z  e#ej        ejN                   ee                       e#ej        ejO                   ee                       e#ej        ejN                   ee                       e#ej        ejO                   ee                     d_ Zd` Z e#ej        jl        ej3        ej        ej:                   e#ej        jl        ej3        ej=        ej:                   e#ej        jl        ej3        ej<        ej:                  eda                                                 Z e#ej        jo        ej3        ej        ej:                   e#ej        jo        ej3        ej=        ej:                   e#ej        jo        ej3        ej<        ej:                  edb                                                 Z e#ej        j        ej3        ej        ej:                   e#ej        j        ej3        ej=        ej:                   e#ej        j        ej3        ej<        ej:                  edc                                                 Z e#ej        j        ej3        ej        ej:                   e#ej        j        ej3        ej=        ej:                   e#ej        j        ej3        ej<        ej:                  edd                                                 Zde Z eej        j        df            eej        j        dg            eej        j        dh            e#ej        j        ej3        ej        ej:                   e#ej        j        ej3        ej=        ej:                   e#ej        j        ej3        ej<        ej:                  edi                                                 Z e#ej        j        ej3        ej        ej:                   e#ej        j        ej3        ej<        ej:                   e#ej        j        ej3        ej=        ej:                  edj                                                 Z e#ej        j        ej3        ej        ej:                   e#ej        j        ej3        ej<        ej:                   e#ej        j        ej3        ej=        ej:                  edk                                                 Z e#ej        j        ej3        ej        ej:                   e#ej        j        ej3        ej<        ej:                   e#ej        j        ej3        ej=        ej:                  edl                                                 Z e#ej        j        ej3        ej        ej:                   e#ej        j        ej3        ej<        ej:                   e#ej        j        ej3        ej=        ej:                  edm                                                 Z e#ej        j        ej3        ej:        ej:                  dn             Z e#ej        j        ej3        ej        ej:        ej:                   e#ej        j        ej3        ej<        ej:        ej:                   e#ej        j        ej3        ej=        ej:        ej:                  do                                     Z e#ej        ej                  dp             Z	 dtdrZ e&e!          ds             Z e ej                    e#           dS )u    )reduceN)ir)Registry
lower_cast)parse_dtype)models)typescgutils)ufunc_db)register_ufuncs   )nvvm)cuda)	nvvmutilsstubserrors)dim3CUDADispatcherc                     t          j        | d|z            }t          j        | d|z            }t          j        | d|z            }t          j        | |||f          S )Nz%s.xz%s.yz%s.z)r   	call_sregr
   pack_struct)builderprefixxyzs        S/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/numba/cuda/cudaimpl.pyinitialize_dim3r      s]    GVf_55AGVf_55AGVf_55AwAq	222    	threadIdxc                 "    t          |d          S )Ntidr   contextr   sigargss       r   cuda_threadIdxr(       s    7E***r   blockDimc                 "    t          |d          S )Nntidr#   r$   s       r   cuda_blockDimr,   %   s    7F+++r   blockIdxc                 "    t          |d          S )Nctaidr#   r$   s       r   cuda_blockIdxr0   *   s    7G,,,r   gridDimc                 "    t          |d          S )Nnctaidr#   r$   s       r   cuda_gridDimr4   /   s    7H---r   laneidc                 ,    t          j        |d          S )Nr5   )r   r   r$   s       r   cuda_laneidr7   4   s    w111r   r   c                 .    |                     |d          S Nr   extract_valuer$   s       r   dim3_xr<   9         q)))r   r   c                 .    |                     |d          S )Nr   r:   r$   s       r   dim3_yr?   >   r=   r   r   c                 .    |                     |d          S )N   r:   r$   s       r   dim3_zrB   C   r=   r   c                     |d         S r9    r$   s       r   cuda_const_array_likerE   J   s     7Nr   c                 L    t           dz  a d                    | t                     S )zDue to bug with NVVM invalid internalizing of shared memory in the
    PTX output.  We can't mark shared memory to be internal. We have to
    ensure unique name is generated for shared memory symbol.
    r   z{0}_{1})_unique_smem_idformatnames    r   _get_unique_smem_idrK   T   s$     qOD/222r   c           	          |j         d         j        }t          |j         d                   }t          | ||f|t	          d          t
          j        d          S )Nr   r   _cudapy_smemTshapedtypesymbol_name	addrspacecan_dynsized)r'   literal_valuer   _generic_arrayrK   r   ADDRSPACE_SHAREDr%   r   r&   r'   lengthrP   s         r   cuda_shared_array_integerrY   ^   sX    Xa[&F$$E'76)5&9.&I&I$($9'+- - - -r   c           	          d |j         d         D             }t          |j         d                   }t          | |||t          d          t          j        d          S )Nc                     g | ]	}|j         
S rD   rT   .0ss     r   
<listcomp>z+cuda_shared_array_tuple.<locals>.<listcomp>k       444!ao444r   r   r   rM   TrN   )r'   r   rU   rK   r   rV   r%   r   r&   r'   rO   rP   s         r   cuda_shared_array_tuplerc   h   sc     54sx{444E$$E'7%u&9.&I&I$($9'+- - - -r   c           	          |j         d         j        }t          |j         d                   }t          | ||f|dt          j        d          S )Nr   r   _cudapy_lmemFrN   )r'   rT   r   rU   r   ADDRSPACE_LOCALrW   s         r   cuda_local_array_integerrg   s   sP    Xa[&F$$E'76)5&4$($8',. . . .r   c           	          d |j         d         D             }t          |j         d                   }t          | |||dt          j        d          S )Nc                     g | ]	}|j         
S rD   r\   r]   s     r   r`   z(ptx_lmem_alloc_array.<locals>.<listcomp>   ra   r   r   r   re   FrN   )r'   r   rU   r   rf   rb   s         r   ptx_lmem_alloc_arrayrj   }   s[     54sx{444E$$E'7%u&4$($8',. . . .r   c                     |rJ d}|j         }t          j        t          j                    d          }t	          j        |||          }|                    |d           |                                 S )Nzllvm.nvvm.membar.ctarD   moduler   FunctionTypeVoidTyper
   get_or_insert_functioncallget_dummy_valuer%   r   r&   r'   fnamelmodfntysyncs           r   ptx_threadfence_blockrx      h    OOO"E>D?2;=="--D)$e<<DLLr""$$$r   c                     |rJ d}|j         }t          j        t          j                    d          }t	          j        |||          }|                    |d           |                                 S )Nzllvm.nvvm.membar.sysrD   rl   rs   s           r   ptx_threadfence_systemr{      ry   r   c                     |rJ d}|j         }t          j        t          j                    d          }t	          j        |||          }|                    |d           |                                 S )Nzllvm.nvvm.membar.glrD   rl   rs   s           r   ptx_threadfence_devicer}      sh    OOO!E>D?2;=="--D)$e<<DLLr""$$$r   c                     |                      t          j        d          }t          j        t          j                  }t	          | |||g          S )Nl    )get_constantr	   int32noneptx_syncwarp_mask)r%   r   r&   r'   maskmask_sigs         r   ptx_syncwarpr      s@    Z88Dz%+&&HWgx$@@@r   c                    d}|j         }t          j        t          j                    t          j        d          f          }t          j        |||          }|                    ||           |                                 S )Nzllvm.nvvm.bar.warp.sync    )	rm   r   rn   ro   IntTyper
   rp   rq   rr   rs   s           r   r   r      sj    %E>D?2;==2:b>>*;<<D)$e<<DLLt""$$$r   c           
      H   |\  }}}}}|j         d         }	|	t          j        v r-|                    |t	          j        |	j                            }d}
|j        }t	          j        t	          j	        t	          j        d          t	          j        d          f          t	          j        d          t	          j        d          t	          j        d          t	          j        d          t	          j        d          f          }t          j        |||
          }|	j        dk    r|                    ||||||f          }|	t          j        k    rj|                    |d          }|                    |d          }|                    |t	          j                              }t          j        |||f          }n|                    |t	          j        d                    }|                    ||                     t          j        d                    }|                    |t	          j        d                    }|                    ||||||f          }|                    ||||||f          }|                    |d          }|                    |d          }|                    |d          }|                    |t	          j        d                    }|                    |t	          j        d                    }|                    ||                     t          j        d                    }|                    ||          }|	t          j        k    r'|                    |t	          j                              }t          j        |||f          }|S )a  
    The NVVM intrinsic for shfl only supports i32, but the cuda intrinsic
    function supports both 32 and 64 bit ints and floats, so for feature parity,
    i64, f32, and f64 are implemented. Floats by way of bitcasting the float to
    an int, then shuffling, then bitcasting back. And 64-bit values by packing
    them into 2 32bit values, shuffling thoose, and then packing back together.
    rA   zllvm.nvvm.shfl.sync.i32r   r   r   @   )r'   r	   real_domainbitcastr   r   bitwidthrm   rn   LiteralStructTyper
   rp   rq   float32r;   	FloatTypemake_anonymous_structtrunclshrr   i8zextshlor_float64
DoubleType)r%   r   r&   r'   r   modevalueindexclamp
value_typert   ru   rv   funcretrvpredfvvalue1
value_lshrvalue2ret1ret2rv1rv2rv1_64rv2_64rv_shls                               r   ptx_shfl_sync_i32r      s     '+#D$ue!JU&&&rz*2E'F'FGG%E>D?
bjnnbjmm<==Z^^RZ^^RZ^^Z^^RZ^^= D
 )$e<<Db  ll4$eUE!BCC&&&&sA..B((a00DR\^^44B/"dDDCubjnn55\\%)=)=eh)K)KLL
z2:b>>::||D4vue"DEE||D4vue"DEE##D!,,##D!,,$$T1--c2:b>>22c2:b>>22VW%9%9%(B%G%GHH[[((&&R]__55B+Gb$Z@@Jr   c                 x   d}|j         }t          j        t          j        t          j        d          t          j        d          f          t          j        d          t          j        d          t          j        d          f          }t          j        |||          }|                    ||          S )Nzllvm.nvvm.vote.syncr   r   )rm   r   rn   r   r   r
   rp   rq   )r%   r   r&   r'   rt   ru   rv   r   s           r   ptx_vote_syncr      s    !E>D?2/B13A1@ A AJrNNBJrNNBJqMMJL LD )$e<<D<<d###r   c                    |\  }}|j         d         j        }|j         d         t          j        v r(|                    |t          j        |                    }d                    |          }|j        }t          j	        t          j        d          t          j        d          t          j        |          f          }	t          j        ||	|          }
|                    |
||f          S )Nr   zllvm.nvvm.match.any.sync.i{}r   )r'   r   r	   r   r   r   r   rH   rm   rn   r
   rp   rq   r%   r   r&   r'   r   r   widthrt   ru   rv   r   s              r   ptx_match_any_syncr      s    
 KD%HQK E
x{e'''rz%'8'899*11%88E>D?2:b>>BJrNNBJu<M<M+NOOD)$e<<D<<tUm,,,r   c                 ,   |\  }}|j         d         j        }|j         d         t          j        v r(|                    |t          j        |                    }d                    |          }|j        }t          j	        t          j
        t          j        d          t          j        d          f          t          j        d          t          j        |          f          }	t          j        ||	|          }
|                    |
||f          S )Nr   zllvm.nvvm.match.all.sync.i{}r   )r'   r   r	   r   r   r   r   rH   rm   rn   r   r
   rp   rq   r   s              r   ptx_match_all_syncr     s    
 KD%HQK E
x{e'''rz%'8'899*11%88E>D?2/B13A1@ A AJrNNBJu,=,=>@ @D )$e<<D<<tUm,,,r   c                     t          j        t          j        t          j        d          g           ddd          }|                    |g           S )Nr   zactivemask.b32 $0;=rTside_effectr   	InlineAsmrn   r   rq   r%   r   r&   r'   
activemasks        r   ptx_activemaskr     sL    bobjnnbAA2DdL L LJ<<
B'''r   c                     t          j        t          j        t          j        d          g           ddd          }|                    |g           S )Nr   zmov.u32 $0, %lanemask_lt;r   Tr   r   r   s        r   ptx_lanemask_ltr   $  sL    bobjnnbAA94*.0 0 0J <<
B'''r   c                 8    |                     |d                   S r9   )ctpopr$   s       r   ptx_popcr   ,  s    ==a!!!r   c                      |j         | S N)fmar$   s       r   ptx_fmar   1  s    7;r   c                 n    ddd}	 ||          S # t           $ r d|  d}t          j        |          w xY w)N)f32f)f64d)r   r   z$Conversion between float16 and float unsupportedKeyErrorr   CudaLoweringErrorr   typemapmsgs      r   float16_float_ty_constraintr   6  s[    \22G,x   , , ,KXKKK&s+++,s    %4c                 4   |j         |j         k    r|S t          |j                   \  }}t          j        |                     |          t          j        d          g          }t          j        |d| dd| d          }|                    ||g          S )N   zcvt..f16 $0, $1;=,h)r   r   r   rn   get_value_typer   r   rq   	r%   r   fromtytotyvalty
constraintrv   asms	            r   float16_to_float_castr   @  s    $-''
0??NB
?711$77"*R..9IJJD
,t4B4446H*6H6H6H
I
IC<<cU###r   c                 2   |j         |j         k    r|S t          |j                   \  }}t          j        t          j        d          |                     |          g          }t          j        |d| dd|           }|                    ||g          S )Nr   cvt.rn.f16. $0, $1;=h,)r   r   r   rn   r   r   r   rq   r   s	            r   float_to_float16_castr   L  s    $-''
0AANB
?2:b>>G,B,B6,J,J+KLLD
,t727779Kz9K9K
L
LC<<cU###r   c                 r    ddddd}	 ||          S # t           $ r d|  d}t          j        |          w xY w)Nchrl)   r   r   r   z"Conversion between float16 and intr   r   r   s      r   float16_int_constraintr   X  s_    CSc33G,x   , , ,I8III&s+++,s    %6c                 (   |j         }t          |          }|j        rdnd}t          j        |                     |          t          j        d          g          }t          j        |d| | dd| d          }	|                    |	|g          S )Nr_   ur   zcvt.rni.r   r   r   )	r   r   signedr   rn   r   r   r   rq   
r%   r   r   r   r   r   r   
signednessrv   r   s
             r   float16_to_integer_castr   b  s    }H'11J,J?711$77"*R..9IJJD
,tD*DhDDD):)))+ +C <<cU###r   c                 &   |j         }t          |          }|j        rdnd}t          j        t          j        d          |                     |          g          }t          j        |d| | dd|           }	|                    |	|g          S )Nr_   r   r   r   r   r   )	r   r   r   r   rn   r   r   r   rq   r   s
             r   integer_to_float16_castr   o  s     H'11J.3J?2:b>>#226::;= =D
,tCZCCCC)Z))+ +C <<cU###r   c                 h    t          | t          j        t          j                  fd            }d S )Nc                     t          j        t          j        d          t          j        d          t          j        d          g          }t          j        | dd          }|                    ||          S )Nr   z.f16 $0,$1,$2;=h,h,hr   rn   r   r   rq   )r%   r   r&   r'   rv   r   ops         r   ptx_fp16_binaryz*lower_fp16_binary.<locals>.ptx_fp16_binary  sh    rz"~~ "
2
2?A Al4B!6!6!6AA||C&&&r   lowerr	   float16)fnr   r   s    ` r   lower_fp16_binaryr  ~  sA    
2u}em,,' ' ' ' -,' ' 'r   addsubmulc                     t          j        t          j        d          t          j        d          g          }t          j        |dd          }|                    ||          S )Nr   zneg.f16 $0, $1;=h,hr   r%   r   r&   r'   rv   r   s         r   ptx_fp16_hnegr	    N    ?2:b>>BJrNN+;<<D
,t.
7
7C<<T"""r   c                 &    t          | |||          S r   )r	  r$   s       r   operator_hnegr        '3555r   c                     t          j        t          j        d          t          j        d          g          }t          j        |dd          }|                    ||          S )Nr   zabs.f16 $0, $1;r  r   r  s         r   ptx_fp16_habsr    r
  r   c                 &    t          | |||          S r   )r  r$   s       r   operator_habsr    r  r   c                    t          j        d          t          j        d          t          j        d          g}t          j        t          j        d          |          }t          j        |dd          }|                    ||          S )Nr   zfma.rn.f16 $0,$1,$2,$3;z=h,h,h,h)r   r   rn   r   rq   )r%   r   r&   r'   argtysrv   r   s          r   ptx_hfmar    sg    jnnbjnnbjnn=F?2:b>>622D
,t6

C
CC<<T"""r   c                 8    d }|                      ||||          S )Nc                 B    t           j                            | |          S r   )r   fp16hdiv)r   r   s     r   fp16_divzfp16_div_impl.<locals>.fp16_div  s    y~~a###r   compile_internal)r%   r   r&   r'   r  s        r   fp16_div_implr    s-    $ $ $ ##GXsDAAAr   z{{
          .reg .pred __$$f16_cmp_tmp;
          setp.{op}.f16 __$$f16_cmp_tmp, $1, $2;
          selp.u16 $0, 1, 0, __$$f16_cmp_tmp;
        }}c                       fd}|S )Nc                    t          j        t          j        d          t          j        d          t          j        d          g          }t          j        |t                              	          d          }|                    ||          }|                     t          j	        d          }|
                    |t          j        d                    }|                    d||          S )Nr   )r   r   r   z!=)r   rn   r   r   	_fp16_cmprH   rq   r   r	   int16r   icmp_unsigned)
r%   r   r&   r'   rv   r   resultzero
int_resultr   s
            r   ptx_fp16_comparisonz*_gen_fp16_cmp.<locals>.ptx_fp16_comparison  s    rz"~~
2
2/OPPl4!1!1R!1!8!8(CCc4((##EK33__VRZ^^<<
$$T:t<<<r   rD   )r   r%  s   ` r   _gen_fp16_cmpr&    s$    = = = = = r   eqnegegtleltc                 h    t          | t          j        t          j                  fd            }d S )Nc                      t                    | |||          }|                    ||d         |d                   S )Nr   r   )r&  select)r%   r   r&   r'   choicer   s        r   ptx_fp16_minmaxz*lower_fp16_minmax.<locals>.ptx_fp16_minmax  s>    "r""7GS$??~~fd1gtAw777r   r   )r  rt   r   r1  s     ` r   lower_fp16_minmaxr2    sA    
2u}em,,8 8 8 8 -,8 8 8r   maxmin
__nv_cbrtf	__nv_cbrtc                     |j         }t          |         }|                     |          }|j        }t	          j        ||g          }t          j        |||          }	|                    |	|          S r   )	return_type
cbrt_funcsr   rm   r   rn   r
   rp   rq   )
r%   r   r&   r'   r   rt   ftyru   rv   r  s
             r   ptx_cbrtr;    si     
BrNE

 
 
$
$C>D?3&&D		'dE	:	:B<<D!!!r   c           	          t          j        |j        t          j        t          j        d          t          j        d          f          d          }|                    ||          S )Nr   	__nv_brevr
   rp   rm   r   rn   r   rq   r%   r   r&   r'   r  s        r   ptx_brev_u4r@    sV    
 
	'

2B(9::
 
B <<D!!!r   c           	          t          j        |j        t          j        t          j        d          t          j        d          f          d          }|                    ||          S )Nr   __nv_brevllr>  r?  s        r   ptx_brev_u8rC  	  sV    
 
	'

2B(9::
 
B <<D!!!r   c                 v    |                     |d         |                     t          j        d                    S r9   )ctlzr   r	   booleanr$   s       r   ptx_clzrG    s4    <<QU]A..0 0 0r   c           	          t          j        |j        t          j        t          j        d          t          j        d          f          d          }|                    ||          S )Nr   __nv_ffsr>  r?  s        r   
ptx_ffs_32rJ    sV     
	'

2B(9::
 
B <<D!!!r   c           	          t          j        |j        t          j        t          j        d          t          j        d          f          d          }|                    ||          S )Nr   r   
__nv_ffsllr>  r?  s        r   
ptx_ffs_64rM  &  sV     
	'

2B(9::
 
B <<D!!!r   c                 <    |\  }}}|                     |||          S r   )r/  )r%   r   r&   r'   testabs          r   ptx_selprR  0  s#    JD!Q>>$1%%%r   c           	          t          j        |j        t          j        t          j                    t          j                    t          j                    f          d          }|                    ||          S )N
__nv_fmaxfr
   rp   rm   r   rn   r   rq   r?  s        r   
ptx_max_f4rV  6  `    		'
LNN\^^R\^^,	. 	. 	
 
B <<D!!!r   c           
         t          j        |j        t          j        t          j                    t          j                    t          j                    f          d          }|                    ||                     ||d         |j        d         t          j
                  |                     ||d         |j        d         t          j
                  g          S )N	__nv_fmaxr   r   r
   rp   rm   r   rn   r   rq   castr'   r	   doubler?  s        r   
ptx_max_f8r]  A       
	'
MOO]__bmoo.	0 	0 	
 
B <<Wd1gsx{ELAAWd1gsx{ELAA   r   c           	          t          j        |j        t          j        t          j                    t          j                    t          j                    f          d          }|                    ||          S )N
__nv_fminfrU  r?  s        r   
ptx_min_f4ra  R  rW  r   c           
         t          j        |j        t          j        t          j                    t          j                    t          j                    f          d          }|                    ||                     ||d         |j        d         t          j
                  |                     ||d         |j        d         t          j
                  g          S )N	__nv_fminr   r   rZ  r?  s        r   
ptx_min_f8rd  ]  r^  r   c           	      6   t          j        |j        t          j        t          j        d          t          j                    f          d          }|                    ||                     ||d         |j	        d         t          j                  g          S )Nr   __nv_llrintr   )r
   rp   rm   r   rn   r   r   rq   r[  r'   r	   r\  r?  s        r   	ptx_roundrg  n  s     
	'
JrNN]__	  	  	
 
B <<Wd1gsx{ELAA   r   c                 8    d }|                      ||||          S )Nc                    t          j        |           st          j        |           r| S |dk    r7|dk    rd|dz
  z  }d}nd|z  }d}| |z  |z  }t          j        |          r| S nd| z  }| |z  }t          |          }t          j        ||z
            dk    rdt          |dz            z  }|dk    r	||z  |z  }n||z  }|S )Nr      g      $@gMDg      ?g      ?g       @)mathisinfisnanroundfabs)r   ndigitspow1pow2r   r   s         r   round_ndigitsz$round_to_impl.<locals>.round_ndigits  s    :a== 	DJqMM 	Ha<<|| "-wTT!Az!}}  WH%DDA!HHIa!e##eAGnn$Aa<<TT!AAIAr   r  )r%   r   r&   r'   rs  s        r   round_to_implrt    s.      B ##G]CHHHr   c                       fd}|S )Nc                 z    |j         \  }|                     |          }|                    ||d                   S r9   )r'   r   fmul)r%   r   r&   r'   argtyfactorconsts         r   implzgen_deg_rad.<locals>.impl  s9    %%eU33||FDG,,,r   rD   )rz  r{  s   ` r   gen_deg_radr|    s#    - - - - - Kr   g     f@c                     |t           j        v rt          j        |d          }|g}n$t          j        |t          |                    } fdt          ||          D             }|j        }||k    rt          d|d|          |j	        t          |          k    r&t          d|j	        t          |          fz            ||fS )z4
    Convert integer indices into tuple of intp
    r   )rP   count)r~  c                 Z    g | ]'\  }}                     ||t          j                  (S rD   )r[  r	   intp)r^   tir   r%   s      r   r`   z&_normalize_indices.<locals>.<listcomp>  sA     0 0 01a ||GQ5:66 0 0 0r   zexpect z	 but got z#indexing %d-D array with %d-D index)
r	   integer_domainUniTupler
   unpack_tuplelenziprP   	TypeErrorndim)r%   r   indtyindsarytyvaltyindicesrP   s   ``      r   _normalize_indicesr    s     $$$U!444&&wCJJGGG0 0 0 0 0ug..0 0 0G KE~~i%%%?@@@zSZZ=SZZ01 2 2 	2 '>r   c                       fd}|S )Nc                     |j         \  }}}|\  }}}	|j        }
t          | |||||          \  }} |                     |          | ||          }t	          j        | ||||d          } | ||
||	          S )NT
wraparound)r'   rP   r  
make_arrayr
   get_item_pointer)r%   r   r&   r'   r  r  r  aryr  r   rP   r  laryptrdispatch_fns                 r   impz_atomic_dispatcher.<locals>.imp  s    !hueT3+GWeT,15: :w )w!!%(('3??&wg268 8 8 {7GUC===r   rD   )r  r  s   ` r   _atomic_dispatcherr    s#    > > > > > Jr   c                 6   |t           j        k    r1|j        }|                    t	          j        |          ||f          S |t           j        k    r1|j        }|                    t	          j        |          ||f          S |                    d||d          S )Nr  	monotonic)	r	   r   rm   rq   r   declare_atomic_add_float32r   declare_atomic_add_float64
atomic_rmwr%   r   rP   r  r   ru   s         r   ptx_atomic_add_tupler        
 ~||I@FF #J( ( 	(	%-		~||I@FF #J( ( 	( !!%c;???r   c                 6   |t           j        k    r1|j        }|                    t	          j        |          ||f          S |t           j        k    r1|j        }|                    t	          j        |          ||f          S |                    d||d          S )Nr  r  )	r	   r   rm   rq   r   declare_atomic_sub_float32r   declare_atomic_sub_float64r  r  s         r   ptx_atomic_subr    r  r   c                     |t           j        j        v rG|j        }|j        }t          t          d|           }|                     ||          ||f          S t          d| d          )Ndeclare_atomic_inc_intzUnimplemented atomic inc with  array	r   cudadeclunsigned_int_numba_typesr   rm   getattrr   rq   r  r%   r   rP   r  r   bwru   r  s           r   ptx_atomic_incr    t    
 666^~Y = = =>>||BBtHHsCj111FFFFGGGr   c                     |t           j        j        v rG|j        }|j        }t          t          d|           }|                     ||          ||f          S t          d| d          )Ndeclare_atomic_dec_intzUnimplemented atomic dec with r  r  r  s           r   ptx_atomic_decr    r  r   c                     t           fd            }t          j        t          j        t          j        fD ]1} t          | t          j        |t          j                  |           2d S )Nc                     |t           j        j        v r|                    ||d          S t	          d d| d          )Nr  zUnimplemented atomic z with r  r   r  integer_numba_typesr  r  )r%   r   rP   r  r   r   s        r   impl_ptx_atomicz+ptx_atomic_bitwise.<locals>.impl_ptx_atomic  sO    T]677%%b#sK@@@KBKKeKKKLLLr   )r  r	   r  r  Tupler   ArrayAny)stubr   r  r   s    `  r   ptx_atomic_bitwiser    s}    M M M M M z5>5;7 A A/dEKUY//@@@@A Ar   andorxorc                 ~    |t           j        j        v r|                    d||d          S t	          d| d          )Nxchgr  zUnimplemented atomic exch with r  r  )r%   r   rP   r  r   s        r   ptx_atomic_exchr  /  sH    
 233!!&#sK@@@G%GGGHHHr   c                    |j         }|t          j        k    r*|                    t	          j        |          ||f          S |t          j        k    r*|                    t	          j        |          ||f          S |t          j        t          j	        fv r|
                    d||d          S |t          j        t          j        fv r|
                    d||d          S t          d|z            Nr3  r  orderingumaxz&Unimplemented atomic max with %s array)rm   r	   r   rq   r   declare_atomic_max_float64r   declare_atomic_max_float32r   int64r  uint32uint64r  r  s         r   ptx_atomic_maxr  :      
 >D||I@FF #J( ( 	(	%-		||I@FF #J( ( 	(	5;,	,	,!!%cK!HHH	5<.	.	.!!&#s[!III@5HIIIr   c                    |j         }|t          j        k    r*|                    t	          j        |          ||f          S |t          j        k    r*|                    t	          j        |          ||f          S |t          j        t          j	        fv r|
                    d||d          S |t          j        t          j        fv r|
                    d||d          S t          d|z            Nr4  r  r  uminz&Unimplemented atomic min with %s array)rm   r	   r   rq   r   declare_atomic_min_float64r   declare_atomic_min_float32r   r  r  r  r  r  r  s         r   ptx_atomic_minr  N  r  r   c                    |j         }|t          j        k    r*|                    t	          j        |          ||f          S |t          j        k    r*|                    t	          j        |          ||f          S |t          j        t          j	        fv r|
                    d||d          S |t          j        t          j        fv r|
                    d||d          S t          d|z            r  )rm   r	   r   rq   r   declare_atomic_nanmax_float64r   declare_atomic_nanmax_float32r   r  r  r  r  r  r  s         r   ptx_atomic_nanmaxr  b      
 >D||ICDII #J( ( 	(	%-		||ICDII #J( ( 	(	5;,	,	,!!%cK!HHH	5<.	.	.!!&#s[!III@5HIIIr   c                    |j         }|t          j        k    r*|                    t	          j        |          ||f          S |t          j        k    r*|                    t	          j        |          ||f          S |t          j        t          j	        fv r|
                    d||d          S |t          j        t          j        fv r|
                    d||d          S t          d|z            r  )rm   r	   r   rq   r   declare_atomic_nanmin_float64r   declare_atomic_nanmin_float32r   r  r  r  r  r  r  s         r   ptx_atomic_nanminr  v  r  r   c                    |                     |j        d         t          j        |j        d         |j        d                   }|d         |                     t          j        d          |d         |d         f}t          | |||          S )Nr   r   rA   )r8  r'   r	   r  r   ptx_atomic_casr$   s       r   ptx_atomic_compare_and_swapr    sm    
//#(1+uz38A;
L
LCGW))%*a88$q'47KD'7C666r   c                    |j         \  }}}}|\  }}	}
}t          | |||	||          \  }} |                     |          | ||          }t          j        | ||||d          }|j        t          j        j        v r,|j	        }|j        j
        }t          j        |||||
|          S t          d|j        z            )NTr  z&Unimplemented atomic cas with %s array)r'   r  r  r
   r  rP   r   r  r  rm   r   r   atomic_cmpxchgr  )r%   r   r&   r'   r  r  oldtyr  r  r  oldr   r  r  r  ru   r   s                    r   r  r    s     "%E5%CsC'%u(-/ /NE7 %7e$$Wgs;;D

"7GUD'.24 4 4C {t}899~;''xc3OOO@5;NOOOr   c                     t          j        t          j        t          j                    t          j        d          g          ddd          }|d         }|                    ||g           d S )Nr   znanosleep.u32 $0;r   Tr   r   )r   r   rn   ro   r   rq   )r%   r   r&   r'   	nanosleepnss         r   ptx_nanosleepr    sf    R_R[]]RZ^^<LMM0#4I I II	aBLLRD!!!!!r   Fc           
          t          t          j        |d          }|dk    o|ot          |          dk    }|dk    r|st	          d           j        |         }	t          |t          j        t          j	        f          p)t          |	t          j                  p|t          j        k    }
|t          j        vr|
st          d|z                                 |          }t!          j        ||          }|t$          j        k    rt)          j        |||          }n|j        }t)          j        ||||          }                     |          }d|dz
                                  z  |_        |rd|_        n$t!          j        |t           j                  |_        |                    |t!          j         t!          j!        d                    d          }tE          j#        t%          j$                    j%                  }                     |          }|&                    |          }|}g }tO          tQ          |                    D ]\  }}|)                    |           ||z  } d	 tQ          |          D             } fd
|D             }|rt!          j*        t!          j+        t!          j!        d          g           ddd          }|,                    |-                    |g           t!          j!        d                    } .                    t          j/        |          }|0                    ||          g}n fd|D             }t          |          }t          j1        ||d          }  2                    |           |          } 3                    ||4                    ||j5        j6                  || .                    t          j/        |          d            |7                                S )Nr   r   zarray length <= 0zunsupported type: %srI   externalr   genericc                     g | ]}|S rD   rD   r]   s     r   r`   z"_generic_array.<locals>.<listcomp>  s    ---Qq---r   c                 P    g | ]"}                     t          j        |          #S rD   r   r	   r  r^   r_   r%   s     r   r`   z"_generic_array.<locals>.<listcomp>  s+    EEE$$UZ33EEEr   r   zmov.u32 $0, %dynamic_smem_size;r   Tr   r   c                 P    g | ]"}                     t          j        |          #S rD   r  r  s     r   r`   z"_generic_array.<locals>.<listcomp>  s+    EEE!'&&uz155EEEr   C)rP   r  layout)datarO   stridesitemsizememinfo)8r   operatorr  r  
ValueErrordata_model_manager
isinstancer	   RecordBooleanr   StructModelr   number_domainr  get_data_typer   	ArrayTyper   rf   r
   alloca_oncerm   add_global_variableget_abi_sizeof
bit_lengthalignlinkageConstant	UndefinedinitializeraddrspacecastPointerTyper   llcreate_target_dataNVVMdata_layoutget_abi_size	enumeratereversedappendr   rn   r   rq   r   r  udivr  r  populate_arrayr   r  type	_getvalue) r%   r   rO   rP   rQ   rR   rS   	elemcountdynamic_smem
data_modelother_supported_typelldtypelarytydataptrru   gvmemr  
targetdatar  
laststriderstridesr  lastsizer  kstridesget_dynshared_sizedynsmem_size	kitemsizekshaper  r  r  s    `                               r   rU   rU     s   x|UA..I >FlFs5zzQLA~~l~,--- +E2J55<788 	"j&"455	"EM! 
 E'''0D'.6777##E**G\'9--FD((( %gvKHHH~ +D&+,57 7 &&w// EAI22444 		B&EMM !#FBL A AE ''r~bjmm/L/L(13 3 &ty{{'>??J##E**G##J//H JH %11  8
###h

--(8,,---GEEEEWEEEH  F
  \"/"*R.."*M*M*K*.DB B B ||GLL1CR$H$H$&JrNN4 4 ((X>>	,,|Y778EEEEuEEE u::DKe$s;;;E
#'

U
#
#GW
5
5C3 ' G G!'#+$+$8$8X$N$N#'  ) ) ) ==??r   c                 *    |                                  S r   )rr   )r%   r   r   pyvals       r   cuda_dispatcher_constr.    s    ""$$$r   )F)	functoolsr   r  rk  llvmliter   llvmlite.bindingbindingr  numba.core.imputilsr   r   numba.core.typing.npydeclr   numba.core.datamodelr   
numba.corer	   r
   numba.npr   numba.np.npyimplr   cudadrvr   numbar   
numba.cudar   r   r   numba.cuda.typesr   r   registryr   lower_getattr
lower_attrlower_constantr   Moduler(   r,   r0   r4   r7   r<   r?   rB   rz  
array_liker  rE   rG   rK   sharedarrayIntegerLiteralr  rY   r  r  rc   localrg   rj   threadfence_blockrx   threadfence_systemr{   threadfencer}   syncwarpr   i4r   shfl_sync_intrinsicr   f4f8r   vote_sync_intrinsicrF  r   match_any_syncr   match_all_syncr   r   r   lanemask_ltr   popcr   r   r   r   r   Floatr   r   r   Integerr   r   r  r  haddr  iaddhsubr  isubhmulr  imulhnegr	  negr  habsr  absr  hfmar  truedivitruedivr  r  r&  heqr'  hner(  hger)  hgtr*  hler+  hltr,  r2  hmaxhminr   r   r9  cbrtr;  brevu4r@  u8rC  clzrG  ffsrJ  rM  selprR  r3  rV  r]  r4  ra  rd  rn  rg  rt  r|  pi_deg2rad_rad2degradiansdegreesr  r  atomicr  r  r  incr  decr  r  and_r   r  exchr  r  r  nanmaxr  nanminr  compare_and_swapr  casr  r  r  r  rU   r.  
get_ufuncsrD   r   r   <module>r     s                       4 4 4 4 4 4 4 4 1 1 1 1 1 1 ' ' ' ' ' ' % % % % % % % %       , , , , , ,             / / / / / / / / / / 1 1 1 1 1 1 1 18::#
(3 3 3 LEL,,+ + -,+ LEL
++, , ,+, LEL
++- - ,+- LEL	**. . +*. LEL))2 2 *)2 D#* * * D#* * * D#* * * tzek**  +* 3 3 3 t{%.	::- - ;:- t{%+uy11t{%.%)44- - 54 21- tz-uy99. . :9. tzei00tz33. . 43 10. u% %  % u  % % ! % u% % % u~A A A u~ux  % % ! % u %(EHehx u %(EHehx u %(EHehx u %(EHehx + +    +\ u %(EHemDD$ $ ED$ uUXux00uUXux00uUXux00uUXux00	- 	- 10 10 10 10	- uUXux00uUXux00uUXux00uUXux00- - 10 10 10 10- u( ( ( u( ( ( uz59" " " uy%)UY	22  32, , , EM5;''$ $ ('$ EK''$ $ ('$, , , EM5=))	$ 	$ *)	$ EM5=))E %-00
$ 
$ 10 *)
$' ' '  %*/5 ) ) )  (, & & &  (- ' ' '  %*/5 ) ) )  (, & & &  (- ' ' '  %*/5 ) ) )  (, & & &  (- ' ' ' uz&&# # '&# x|U]##6 6 $#6 uz&&# # '&# sEM6 6 6 uzu}emDD# # ED# x66x%-77B B 87 76B		 	 	 4ejnemU] 3 3MM$4G4G H H H 0hk5=%- 0 0t1D1D E E E 3ejnemU] 3 3MM$4G4G H H H 0hk5=%- 0 0t1D1D E E E 3ejnemU] 3 3MM$4G4G H H H 0hk5=%- 0 0t1D1D E E E 3ejnemU] 3 3MM$4G4G H H H 0hk5=%- 0 0t1D1D E E E 3ejnemU] 3 3MM$4G4G H H H 0hk5=%- 0 0t1D1D E E E 3ejnemU] 3 3MM$4G4G H H H 0hk5=%- 0 0t1D1D E E E8 8 8  %*/5$ / / /  %*/5$ / / / 
M<	M;
 uz5=!!uz5=!!" " "! "!" uz58" " " uz58" " " uy%)0 0 0 uy%(uy%(" "  " uy%(uy%(" "  " uz59ei33& & 43&
 sEHeh" "  " sEHehsEHehsEHeh        sEHeh" "  " sEHehsEHehsEHeh        uehueh	 	  	  ueh&&ueh&&"I "I '& '&"IJ   7T>$'> dlEH  kk(33 4 4 4 dlEH  kk(33 4 4 4 dlEH  kk(33 4 4 4 dlEH  kk(33 4 4 4  .  $ u|ej%)<<u|enei@@u|ek59==
@ 
@  >= A@ =<
@ u|ej%)<<u|enei@@u|ek59==
@ 
@  >= A@ =<
@ u|ej%)<<u|enei@@u|ek59==H H  >= A@ =<H u|ej%)<<u|enei@@u|ek59==H H  >= A@ =<H	A 	A 	A  5<$e , , ,  5<#T * * *  5<#U + + + u|%+uz59==u|%+u~uyAAu|%+u{EI>>I I  ?> BA >=I u|ej%)<<u|ek59==u|enei@@J J  A@ >= =<J  u|ej%)<<u|ek59==u|enei@@J J  A@ >= =<J  u|EKUY??u|EKei@@u|EKCCJ J  DC A@ @?J  u|EKUY??u|EKei@@u|EKCCJ J  DC A@ @?J  u|$ek59eiHH7 7 IH7 u|ej%)UYGGu|ek59eiHHu|eneiKKP P LK IH HGP* u%%" " &%" !&a a a aH % %  % ##%%u - - - - -r   