
    J/PhA                     h   d dl Z d dlmZ d dlmZ d dlmZ d dlm	Z	m
Z
mZmZmZmZmZ d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d
dlmZ d dlmZmZmZ d dl m!Z!  G d dej                  Z" e j#        de j$                  Z% G d de          Z& G d de          Z' G d de          Z(dS )    N)cached_property)ir)cgutilsconfig	debuginfoitanium_manglertypestypingutils)
Dispatcher)BaseContext)BaseCallConvMinimalCallConv)	cmathdecl)	datamodel   )nvvm)codegen	nvvmutilsufuncs)cuda_data_managerc                   $     e Zd Zd Z fdZ xZS )CUDATypingContextc                    ddl m}m}m}m} ddlm}m} |                     |j	                   |                     |j	                   |                     |j	                   |                     t          j	                   |                     |j	                   |                     |j	                   |                     |j                   d S )Nr   )cudadeclcudamathlibdevicedeclvector_typesr   )enumdecl
cffi_utils) r   r   r   r   numba.core.typingr   r    install_registryregistryr   typing_registry)selfr   r   r   r   r   r    s          Q/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/numba/cuda/target.pyload_additional_registriesz,CUDATypingContext.load_additional_registries   s    EEEEEEEEEEEE::::::::h/000j1222h/000i0111m4555h/000l:;;;;;    c                    ddl m} t          |t                    rt          ||          s	 |j        }n# t
          $ r |j        st          d          |j        	                                }d|d<   |
                    dd          |d<   |
                    dd          |d<    ||j        |          }||_        |}Y nw xY wt          t          |                               |          S )	Nr   )CUDADispatcherz<using cpu function on device but its compilation is disabledTdevicedebugFopt)numba.cuda.dispatcherr+   
isinstancer   _CUDATypingContext__dispatcherAttributeError_can_compile
ValueErrortargetoptionscopygetpy_funcsuperr   resolve_value_type)r&   valr+   r5   disp	__class__s        r'   r:   z$CUDATypingContext.resolve_value_type#   s#   888888sJ'' 	3//	&!   ' H$ &G H H H # 1 6 6 8 8*.h')6):):7E)J)Jg&'4'8'8'E'Ee$%~ck=AA $(  &--@@EEEs   6 B
CC)__name__
__module____qualname__r(   r:   __classcell__r=   s   @r'   r   r      sP        
< 
< 
<F F F F F F F F Fr)   r   z	[^a-z0-9]c                        e Zd ZdZdZd fd	Zed             Zed             Zd Z	d Z
d Zd	 Zed
             Zed             Zed             ZddddZ	 ddZd Zd Zd Zd Zd Zd Z xZS )CUDATargetContextTcudac                     t                                          ||           t          j        t          j                  | _        d S N)r9   __init__r   chainr   default_managerdata_model_manager)r&   	typingctxtargetr=   s      r'   rH   zCUDATargetContext.__init__G   s>    F+++"3"9%#
 #
r)   c                     t           j        S rG   )r   	DIBuilderr&   s    r'   rO   zCUDATargetContext.DIBuilderM   s    ""r)   c                     dS )NF rP   s    r'   enable_boundscheckz$CUDATargetContext.enable_boundscheckQ   s	     ur)   c                 6    | j                             |          S rG   )_internal_codegen_create_empty_module)r&   names     r'   create_modulezCUDATargetContext.create_moduleW   s    %::4@@@r)   c                 F    t          j        d          | _        d | _        d S )Nznumba.cuda.jit)r   JITCUDACodegenrU   _target_datarP   s    r'   initzCUDATargetContext.initZ   s$    !(!78H!I!I r)   c                    ddl m}m}m} ddl m}m}m} ddl m}m} ddl m	}	 ddl
m}
 ddlm} ddlm} d	d
lm}m}m}m}m} ddlm} |                     |j                   |                     |
j                   |                     |j                   |                     |j                   |                     |	j                   |                     |j                   |                     |j                   d S )Nr   )numberstupleobjslicing)rangeobj	iteratorsenumimpl)unicodecharseq)	cmathimpl)cffiimpl)arrayobj)
npdatetimer   )cudaimpl	printimpllibdeviceimplmathimplr   )ndarray)numba.cpythonr^   r_   r`   ra   rb   rc   rd   re   rf   
numba.miscrg   numba.nprh   ri   r!   rj   rk   rl   rm   r   numba.np.unsafern   r#   r$   impl_registry)r&   r^   r_   r`   ra   rb   rc   rd   re   rf   rg   rh   ri   rj   rk   rl   rm   r   rn   s                      r'   r(   z,CUDATargetContext.load_additional_registries^   s    	=<<<<<<<<<??????????22222222++++++''''''%%%%%%''''''	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	,+++++h/000h/000i0111m4555i0111h/000l899999r)   c                     | j         S rG   )rU   rP   s    r'   r   zCUDATargetContext.codegenv   s    %%r)   c                 |    | j         /t          j        t          j                    j                  | _         | j         S rG   )r[   llcreate_target_datar   NVVMdata_layoutrP   s    r'   target_datazCUDATargetContext.target_datay   s0    $ " 5dikk6M N ND  r)   c                 N    ddl m d}t          fd|D                       }|S )z
        Some CUDA intrinsics are at the module level, but cannot be treated as
        constants, because they are loaded from a special register in the PTX.
        These include threadIdx, blockDim, etc.
        r   rE   )	threadIdxblockDimblockIdxgridDimlaneidwarpsizec                 <    g | ]}t          j                  |fS rR   )r	   Module).0ncrE   s     r'   
<listcomp>z;CUDATargetContext.nonconst_module_attrs.<locals>.<listcomp>   s8     $9 $9 $9(* &+\$%7%7$< $9 $9 $9r)   )numbarE   tuple)r&   	nonconstsnonconsts_with_modrE   s      @r'   nonconst_module_attrsz'CUDATargetContext.nonconst_module_attrs   sZ     	!	" $9 $9 $9 $9.7$9 $9 $9 : :!!r)   c                      t          |           S rG   )CUDACallConvrP   s    r'   	call_convzCUDATargetContext.call_conv   s    D!!!r)   rR   Nabi_tagsuidc                2    t          j        ||||          S )Nr   )r   mangle)r&   rW   argtypesr   r   s        r'   manglerzCUDATargetContext.mangler   s%    %dHx*-/ / / 	/r)   c	           	         t          j        |j        d          }	|                                                     |j         d|	||          }
|
                    |           |                     |
||	||||          }|
|fS )a  
        Adapt a code library ``codelib`` with the numba compiled CUDA kernel
        with name ``fname`` and arguments ``argtypes`` for NVVM.
        A new library is created with a wrapper function that can be used as
        the kernel entry point for the given kernel.

        Returns the new code library and the wrapper function.

        Parameters:

        codelib:       The CodeLibrary containing the device function to wrap
                       in a kernel call.
        fndesc:        The FunctionDescriptor of the source function.
        debug:         Whether to compile with debug.
        lineinfo:      Whether to emit line info.
        nvvm_options:  Dict of NVVM options used when compiling the new library.
        filename:      The source filename that the function is contained in.
        linenum:       The source line that the function is on.
        max_registers: The max_registers argument for the code library.
        cudapyns_kernel_)
entry_namenvvm_optionsmax_registers)r   prepend_namespacellvm_func_namer   create_libraryrW   add_linking_librarygenerate_kernel_wrapper)r&   codelibfndescr-   lineinfor   filenamelinenumr   kernel_namelibrarywrappers               r'   prepare_cuda_kernelz%CUDATargetContext.prepare_cuda_kernel   s    . &7!h
 
 
 ,,..//7<0I0I0I;F=I>K 0 M M 	##G,,,..w/4h/68 8 r)   c                   !" |j         }|                     |          }	t          |	j                  }
t	          j        t	          j                    |
          }|                     d          "t	          j        t	          j        d          | j	        
                    t          j                  g|
z             }t	          j        "||j                  }t          j        |j        d          }t	          j        "||          !t	          j        !                    d                    }|s|rH|o| }|                     "|| |          } |j        !||j        ||            |j        ||           !"fd} |d          }g }g }d	D ]D}|                     |d
|z                       |                     |d|z                       E|	                    |!j                  }| j	                            ||t          j        ||          \  }}|rt9          j        ||j                  5  |                                 ddd           n# 1 swxY w Y   |                     |!                    |j"                            5  t	          j#        |j$        j%        d          }|&                    |||j'        dd          }|(                    |d          }tS          j*        |          }|                     |          5  tW          d	|          D ]0\  }}|,                    |          } |-                    | |           1tW          d	|          D ]0\  }}|.                    |          } |-                    | |           1	 ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   |                                 t_          j0        !           |1                    "           |s|r |j2                     |2                                 tf          j4        rtk          j6        |"           |7                    !j                  S )z
        Generate the kernel wrapper in the given ``library``.
        The function being wrapped is described by ``fndesc``.
        The wrapper function is returned.
        zcuda.kernel.wrapper    r   r   r!   )modulefilepathcgctxdirectives_onlyc                     j         | z   }t          j        t          j        d          |          }t          j        |j        j        d           |_        |S )Nr   )	rW   r   add_global_variabler   IntTypeConstanttypepointeeinitializer)postfixrW   gvwrapfnwrapper_modules      r'   define_error_gvzBCUDATargetContext.generate_kernel_wrapper.<locals>.define_error_gv   sL    ;(D,^RZ^^-13 3B[$??BNIr)   __errcode__xyzz	__tid%s__z__ctaid%s__N	monotonicr   )8r   get_arg_packerlistargument_typesr   FunctionTypeVoidTyperX   r   r   get_return_typer	   pyobjectFunctionr   r   r   rW   	IRBuilderappend_basic_blockrO   mark_subprogramargsmark_locationappendfrom_argumentscall_functionvoidr   	if_likelyis_okret_voidif_thennot_is_python_excr   r   r   cmpxchgcodeextract_valuer   SRegBuilderziptidstorectaidr   set_cuda_kerneladd_ir_modulefinalizer   	DUMP_LLVMr   	dump_llvmget_function)#r&   r   r   r   r-   r   r   r   r   arginfoargtyswrapfntyfntyfuncprefixedbuilderr   r   r   gv_excgv_tidgv_ctaidicallargsstatus_oldxchgchangedsregdimptrr;   r   r   s#                                    @@r'   r   z)CUDATargetContext.generate_kernel_wrapper   s.    ?%%h//g,--?2;==&99++,ABBrz"~~ $ > >u~ N NO!' () ) {>41FGG"4TY8LLL^Xx@@,v88<<== 		6H 		6&4u9On08-17F ' H HI &I%V[(G   $I#GW555	 	 	 	 	 	 !// 	@ 	@AMM//+/::;;;OOOOMA,=>>????))'6;??N00T5:x; ;	  	0"7FL99 # #  """# # # # # # # # # # # # # # # f.B!C!CDD 0 0k&+"5t<<
 vsFK'2KA A!//a88 !,W55__W-- 0 0%(%7%7 0 0	S"hhsmmc3////%(%9%9 0 0	S"jjooc3////00 0 0 0 0 0 0 0 0 0 0 0 0 0 00 0 0 0 0 0 0 0 0 0 0 0 0 0 0, 	V$$$n--- 	!H 	!I    	4OFN333##FK000sJ   9II!IA=N6BNN6N#	#N6&N#	'N66N:=N:c           	          |j         } fdt          |                    d                    D             }t          j        t          j        d          t          |                    }t          j        ||          }t          j	        }t          j        ||j        d|          }	d|	_        d|	_        ||	_                             |j                  }
                     |
          }d	|d
z
                                  z  |	_        t          j        t          j        d                    }|                    |	|d          }                      |           |          } fd|j        D             } fd|j        D             }                     ||                    ||j        j                  |||j        |j        d           |                                S )i
        Unlike the parent version.  This returns a a pointer in the constant
        addrspace.
        c                 P    g | ]"}                     t          j        |          #S rR   )get_constantr	   byte)r   r   r&   s     r'   r   z9CUDATargetContext.make_constant_array.<locals>.<listcomp>  s;     
 
 
 ej!,,
 
 
r)   A)order   _cudapy_cmem	addrspaceinternalT   r   genericc                 P    g | ]"}                     t          j        |          #S rR   r   r	   intpr   sr&   s     r'   r   z9CUDATargetContext.make_constant_array.<locals>.<listcomp>6  s+    FFFq$##EJ22FFFr)   c                 P    g | ]"}                     t          j        |          #S rR   r
  r  s     r'   r   z9CUDATargetContext.make_constant_array.<locals>.<listcomp>7  s+    JJJD%%ej!44JJJr)   N)datashapestridesitemsizeparentmeminfo) r   itertobytesr   	ArrayTyper   lenr   r   ADDRSPACE_CONSTANTr   r   r   linkageglobal_constantr   get_data_typedtypeget_abi_sizeof
bit_lengthalignPointerTypeaddrspacecast
make_arrayr  r  populate_arraybitcastr  r  r  	_getvalue)r&   r   arytyarrlmod	constvals
constarytyconstaryr  r   lldtyper   ptrtygenptrarykshapekstridess   `                r'   make_constant_arrayz%CUDATargetContext.make_constant_array  s    ~
 
 
 
#++C+0011
 
 
	 \"*Q--Y@@
;z955+	(x}n3<> > >
!! $$U[11##G,,..000 rz!}}--&&r5)<< %dooe$$T733FFFFCIFFFJJJJckJJJCgoofchm&L&L"($,%(\#*$(	 	 	* 	* 	* }}r)   c                    t          j        |                    d          dz             }d                    dt	          j        |          g          }|j                            |          }|<t          j        ||j	        |t          j                  }d|_        d|_        ||_        |j	        j        j        }|                    |                    t          j                            S )	r   zutf-8    $__conststring__Nr  r  T)r   make_bytearrayencodejoinr   mangle_identifierglobalsr7   r   r   r   r  r  r  r   r   elementr%  
as_pointer)r&   modstringtextrW   r   chartys          r'   insert_const_stringz%CUDATargetContext.insert_const_string@  s    
 %fmmG&<&<w&FGGxx*(:6BBD E E [__T"":,S$)T7;7NP P PB#BJ!%B!BN (zz&++D,CDDEEEr)   c                     |j         }|                     ||          }t          j        t          j        d                    }|                    ||d          S )z
        Insert a constant string in the constant addresspace and return a
        generic i8 pointer to the data.

        This function attempts to deduplicate.
        r  r  )r   rC  r   r!  r   r"  )r&   r   r@  r)  r   	charptrtys         r'   insert_string_const_addrspacez/CUDATargetContext.insert_string_const_addrspaceV  sO     ~%%dF33N2:a==11	$$RI>>>r)   c                     dS )zRun O1 function passes
        NrR   r&   r   s     r'   optimize_functionz#CUDATargetContext.optimize_functionb  s	     	r)   c                 *    t          j        |          S rG   )r   get_ufunc_info)r&   	ufunc_keys     r'   rK  z CUDATargetContext.get_ufunc_infoo  s    $Y///r)   r|   rG   )r>   r?   r@   implement_powi_as_math_callstrict_alignmentrH   propertyrO   rS   rX   r\   r(   r   rz   r   r   r   r   r   r   r3  rC  rF  rI  rK  rA   rB   s   @r'   rD   rD   C   s       "&
 
 
 
 
 
 # # X#   X
A A A! ! !: : :0& & & ! ! X!
 " " _" " " _" 35$ / / / / / +/"  "  "  " HZ1 Z1 Z1x) ) )VF F F,
? 
? 
?  0 0 0 0 0 0 0r)   rD   c                       e Zd ZdS )r   N)r>   r?   r@   rR   r)   r'   r   r   s  s        Dr)   r   c                   P    e Zd ZdZd Zd Z	 	 ddZd Zd Zdd	Z	d
 Z
d Zd ZdS )CUDACABICallConvz
    Calling convention aimed at matching the CUDA C/C++ ABI. The implemented
    function signature is:

        <Python return type> (<Python arguments>)

    Exceptions are unsupported in this convention.
    c                     d S rG   rR   )r&   r   s     r'   _make_call_helperz"CUDACABICallConv._make_call_helper  s	     tr)   c                 ,    |                     |          S rG   )ret)r&   r   retvals      r'   return_valuezCUDACABICallConv.return_value  s    {{6"""r)   Nc                 $    d}t          |          )Nz7Python exceptions are unsupported in the CUDA C/C++ ABINotImplementedError)r&   r   excexc_argsloc	func_namemsgs          r'   return_user_excz CUDACABICallConv.return_user_exc  s    G!#&&&r)   c                 $    d}t          |          )Nz2Return status is unsupported in the CUDA C/C++ ABIrZ  )r&   r   r   r`  s       r'   return_status_propagatez(CUDACABICallConv.return_status_propagate  s    B!#&&&r)   c                     |                      |          }t          |j                  }t          j        |                     |          |          }|S )zM
        Get the LLVM IR Function type for *restype* and *argtypes*.
        )_get_arg_packerr   r   r   r   r   )r&   restyper   r   r   s        r'   get_function_typez"CUDACABICallConv.get_function_type  sK     &&x00.//t33G<<hGGr)   Fc                     |rJ |                      |          }|                    |                     |          d |D                        dS )zA
        Set names and attributes of function arguments.
        c                     g | ]}d |z   S )zarg.rR   )r   as     r'   r   z6CUDACABICallConv.decorate_function.<locals>.<listcomp>  s    777Qfqj777r)   N)re  assign_namesget_arguments)r&   fnr   fe_argtypesnoaliasr   s         r'   decorate_functionz"CUDACABICallConv.decorate_function  sf     &&{33T//3377$777	9 	9 	9 	9 	9r)   c                     |j         S )z@
        Get the Python-level arguments of LLVM *func*.
        )r   rH  s     r'   rl  zCUDACABICallConv.get_arguments  s     yr)   c                     |                      |          }|                    ||          }|                    ||          }d}	| j                            |||          }
|	|
fS )z3
        Call the Numba-compiled *callee*.
        N)re  as_argumentscallcontextget_returned_value)r&   r   calleerestyr   r   r   realargsr   r   outs              r'   r   zCUDACABICallConv.call_function  si     &&v..''66||FH-- l--gudCCs{r)   c                 J    | j         j        |                                         S rG   )ru  rK   r   )r&   tys     r'   r   z CUDACABICallConv.get_return_type  s    |.r2BBDDDr)   )NNN)F)r>   r?   r@   __doc__rT  rX  ra  rc  rg  rp  rl  r   r   rR   r)   r'   rR  rR  w  s           
# # # @D"&' ' ' '
' ' '  9 9 9 9    E E E E Er)   rR  ))re	functoolsr   llvmlite.bindingbindingrv   llvmliter   
numba.corer   r   r   r   r	   r
   r   numba.core.dispatcherr   numba.core.baser   numba.core.callconvr   r   r"   r   r   cudadrvr   
numba.cudar   r   r   numba.cuda.modelsr   r   compileIVALID_CHARSrD   r   rR  rR   r)   r'   <module>r     s2   				 % % % % % %            ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' , , , , , , ' ' ' ' ' ' = = = = = = = = ' ' ' ' ' '                   1 1 1 1 1 1 1 1 1 1 / / / / / /$F $F $F $F $F* $F $F $FT bjrt,,m0 m0 m0 m0 m0 m0 m0 m0`		 	 	 	 	? 	 	 	AE AE AE AE AE| AE AE AE AE AEr)   