
    J/Ph>                     8   d dl mZ d dlmZ d dlmZmZmZmZm	Z	m
Z
 d dlmZmZmZmZmZmZ d dlmZ d dlmZmZmZ d dlmZ d dlmZmZmZ d d	lmZ d d
l m!Z! d dl"m#Z# d Z$ G d de          Z% G d de          Z&d Z' edd           G d de                      Z( edd           G d de                      Z) G d de          Z*e	 	 	 d)d            Z+d Z,e	 	 	 d*d             Z-	 	 	 d+d!Z.	 	 d,d#Z/	 	 	 d-d$Z0d% Z1d& Z2 G d' d(e3          Z4dS ).    )ir)ConcreteTemplate)typestypingfuncdescconfigcompilersigutils)sanitize_compile_result_entriesCompilerBaseDefaultPassBuilderFlagsOptionCompileResult)global_compiler_lock)LoweringPassPassManagerregister_pass)NumbaInvalidConfigWarning)IRLegalizationNativeLoweringAnnotateTypes)warn)get_current_device)CUDACABICallConvc                 <    | d S t          | t                    sJ | S N)
isinstancedict)xs    S/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/numba/cuda/compiler.py_nvvm_options_typer"      s)    yt !T"""""    c                   F    e Zd Z eedd          Z eedd          ZdS )	CUDAFlagsNzNVVM options)typedefaultdoczCompute Capability)__name__
__module____qualname__r   r"   nvvm_optionstuplecompute_capability r#   r!   r%   r%      sQ        6  L
     r#   r%   c                   $    e Zd Zed             ZdS )CUDACompileResultc                      t          |           S r   )idselfs    r!   entry_pointzCUDACompileResult.entry_point9   s    $xxr#   N)r)   r*   r+   propertyr6   r/   r#   r!   r1   r1   8   s-          X  r#   r1   c                  8    t          |           } t          di | S )Nr/   )r   r1   )entriess    r!   cuda_compile_resultr:   >   s#    -g66G''w'''r#   TF)mutates_CFGanalysis_onlyc                       e Zd ZdZd Zd ZdS )CUDABackendcuda_backendc                 .    t          j        |            d S r   r   __init__r4   s    r!   rB   zCUDABackend.__init__H       d#####r#   c           
          |d         }t          j        |j        g|j        R  }t	          |j        |j        |j        j        |j	        |j
        |j        ||j                  |_        dS )zH
        Back-end: Packages lowering output in a compile result
        cr)typing_contexttarget_contexttyping_errortype_annotationlibrarycall_helper	signaturefndescT)r   rL   return_typeargsr:   	typingctx	targetctxstatusfail_reasonrI   rJ   rK   rM   rE   )r5   stateloweredrL   s       r!   run_passzCUDABackend.run_passK   sr     +$U%6DDDD	& ? ?1!1M+>	
 	
 	
 tr#   N)r)   r*   r+   _namerB   rV   r/   r#   r!   r>   r>   C   s9         E$ $ $    r#   r>   c                   "    e Zd ZdZdZd Zd ZdS )CreateLibraryz
    Create a CUDACodeLibrary for the NativeLowering pass to populate. The
    NativeLowering pass will create a code library if none exists, but we need
    to set it up with nvvm_options from the flags if they are present.
    create_libraryc                 .    t          j        |            d S r   rA   r4   s    r!   rB   zCreateLibrary.__init__i   rC   r#   c                     |j                                         }|j        j        }|j        j        }|                    ||          |_        |j                                         dS )N)r,   T)	rQ   codegenfunc_idfunc_qualnameflagsr,   rZ   rJ   enable_object_caching)r5   rT   r]   namer,   s        r!   rV   zCreateLibrary.run_passl   s\    /))++}*{/..t,.OO++---tr#   N)r)   r*   r+   __doc__rW   rB   rV   r/   r#   r!   rY   rY   _   sC          E$ $ $    r#   rY   c                       e Zd Zd Zd ZdS )CUDACompilerc                    t           }t          d          }|                    | j                  }|j                            |j                   |                    | j                  }|j                            |j                   |                     | j                  }|j                            |j                   |                                 |gS )Ncuda)	r   r   define_untyped_pipelinerT   passesextenddefine_typed_pipelinedefine_cuda_lowering_pipelinefinalize)r5   dpbpmuntyped_passestyped_passeslowering_passess         r!   define_pipelineszCUDACompiler.define_pipelinesx   s       44TZ@@
	.///00<<
	,---<<TZHH
	/000
tr#   c                 Z   t          d          }|                    t          d           |                    t          d           |                    t          d           |                    t
          d           |                    t          d           |                                 |S )Ncuda_loweringz$ensure IR is legal prior to loweringzannotate typeszcreate libraryznative loweringzcuda backend)r   add_passr   r   rY   r   r>   rm   )r5   rT   ro   s      r!   rl   z*CUDACompiler.define_cuda_lowering_pipeline   s    ))
N:	< 	< 	<
M#3444 	M#3444
N$5666
K000
	r#   N)r)   r*   r+   rs   rl   r/   r#   r!   re   re   w   s2               r#   re   Nc	                    |t          d          ddlm}	 |	j        }
|	j        }t                      }d|_        d|_        d|_        |s|rd|_	        |rd|_
        |rd|_        nd|_        |rd|_        |rd|_        |r||_        ||_        ddlm}  |d	          5  t%          j        |
|| |||i t(          
          }d d d            n# 1 swxY w Y   |j        }|                                 |S )Nz#Compute Capability must be supplied   cuda_targetTpythonnumpyr   )target_overriderg   )rP   rQ   funcrO   rN   r`   localspipeline_class)
ValueError
descriptorrz   rF   rG   r%   
no_compileno_cpython_wrapperno_cfunc_wrapper	debuginfodbg_directives_onlyerror_modelforceinlinefastmathr,   r.   numba.core.target_extensionr}   r	   compile_extrare   rJ   rm   )pyfuncrN   rO   debuglineinfoinliner   r,   ccrz   rP   rQ   r`   r}   cresrJ   s                   r!   compile_cudar      s    
z>???''''''*I*IKKEE#E!E    )$(! $$# !   *)!E <;;;;;		 	  C C%	09+1+/2=,1-/5AC C CC C C C C C C C C C C C C C C lGKs   !"CCCc                    |j                             |j         d||          }|                    |           |j        }|j        }t          |           }|                    ||          }	| j                            |j        |          }
| 	                    d          }t          j        ||
|j                  }t          j        ||	|          }t          j        |                    d                    }|                     |          }|                    ||j                  }| j                            |||||          \  }}|                    |           |                    |           |                                 |S )z
    Wrap a Numba ABI function in a C ABI wrapper at the NVVM IR level.

    The C ABI wrapper will have the same name as the source Python function.
    
_function_)
entry_namer,   zcuda.cabi.wrapper )r]   rZ   rb   add_linking_libraryargtypesrestyper   get_function_type	call_convcreate_moduler   Functionllvm_func_name	IRBuilderappend_basic_blockget_arg_packerfrom_argumentsrO   call_functionretadd_ir_modulerm   )contextlibrM   wrapper_function_namer,   rJ   r   r   c_call_convwrapfntyfntywrapper_moduler~   wrapfnbuilderarginfocallargs_return_values                      r!   cabi_wrap_functionr      s    k((CH)@)@)@4I6B ) D DG $$$ HnG"7++K,,Wh??H..v~xHHD **+>??N;~tV-BCCD
 [3HIIFl644R8899G$$X..G%%gv{;;H '55w(4 4OA|KK.)))Nr#   cptxc           
      b   |dvrt          d|           |dk    r|st          d          |
dvrt          d|
           |r |rd}t          t          |                     |
dk    }|	pt                      }	||rd	nd
d}|rd|d<   t	          j        |          \  }}|pt          j        }t          | |||||||          }|j	        j
        }|r!|s|t          j        k    rt          d          |j        }|rA|j        }|dk    r3|	                    d| j                  }t%          |||j        ||          }n=| j        }|j        }|j        }|                    |j        |j        |||||          \  }}|r|                    |          }n|                    |          }||fS )a  Compile a Python function to PTX or LTO-IR for a given set of argument
    types.

    :param pyfunc: The Python function to compile.
    :param sig: The signature representing the function's input and output
                types. If this is a tuple of argument types without a return
                type, the inferred return type is returned by this function. If
                a signature including a return type is passed, the compiled code
                will include a cast from the inferred return type to the
                specified return type, and this function will return the
                specified return type.
    :param debug: Whether to include debug info in the compiled code.
    :type debug: bool
    :param lineinfo: Whether to include a line mapping from the compiled code
                     to the source code. Usually this is used with optimized
                     code (since debug mode would automatically include this),
                     so we want debug info in the LLVM IR but only the line
                     mapping in the final output.
    :type lineinfo: bool
    :param device: Whether to compile a device function.
    :type device: bool
    :param fastmath: Whether to enable fast math flags (ftz=1, prec_sqrt=0,
                     prec_div=, and fma=1)
    :type fastmath: bool
    :param cc: Compute capability to compile for, as a tuple
               ``(MAJOR, MINOR)``. Defaults to ``(5, 0)``.
    :type cc: tuple
    :param opt: Enable optimizations. Defaults to ``True``.
    :type opt: bool
    :param abi: The ABI for a compiled function - either ``"numba"`` or
                ``"c"``. Note that the Numba ABI is not considered stable.
                The C ABI is only supported for device functions at present.
    :type abi: str
    :param abi_info: A dict of ABI-specific options. The ``"c"`` ABI supports
                     one option, ``"abi_name"``, for providing the wrapper
                     function's name. The ``"numba"`` ABI has no options.
    :type abi_info: dict
    :param output: Type of output to generate, either ``"ptx"`` or ``"ltoir"``.
    :type output: str
    :return: (code, resty): The compiled code and inferred return type
    :rtype: tuple
    )numbar   zUnsupported ABI: r   z&The C ABI is not supported for kernels)r   ltoirzUnsupported output type: z{debug=True with opt=True (the default) is not supported by CUDA. This may result in a crash - set debug=False or opt=False.r      r   )r   optNzgen-lto)r   r   r   r,   r   z'CUDA kernel must have void return type.abi_name)r   )NotImplementedErrorr   r   r   r
   normalize_signaturer   CUDA_DEFAULT_PTX_CCr   rL   rN   r   void	TypeErrorrG   rJ   getr)   r   rM   __code__co_filenameco_firstlinenoprepare_cuda_kernel	get_ltoirget_asm_str)r   sigr   r   devicer   r   r   abiabi_infooutputmsgltor,   rO   rN   r   restytgtr   wrapper_namecodefilenamelinenumkernels                            r!   compiler      s1   \ .  !";c";";<<<
czz&z!"JKKK%%%!"Ff"F"FGGG - -2 	&s++,,,WC!466H qqQ L
  '"&Y 4S99D+		)v)BT!)H%1b: : :D N&E CV C 3 3ABBB

C 7l#::#<<
FODDL$S#t{L%13 3C #%--dlDK.6h.57 7V  &}}}##"%%;r#   c
                 \    t                      j        }
t          | ||||||
||||	          S )zCompile a Python function to PTX or LTO-IR for a given signature for the
    current device's compute capabilility. This calls :func:`compile` with an
    appropriate ``cc`` value for the current device.	r   r   r   r   r   r   r   r   r   )r   r.   r   )r   r   r   r   r   r   r   r   r   r   r   s              r!   compile_for_current_devicer   j  sA     
			0B63ehv$#$V5 5 5 5r#   r   c
                 6    t          | |||||||||	d          S )zCompile a Python function to PTX for a given signature. See
    :func:`compile`. The defaults for this function are to compile a kernel
    with the Numba ABI, rather than :func:`compile`'s default of compiling a
    device function with the C ABI.r   r   )r   )
r   r   r   r   r   r   r   r   r   r   s
             r!   compile_ptxr   v  s3     63ehv$#$U4 4 4 4r#   c	                 Z    t                      j        }	t          | ||||||	|||
  
        S )zCompile a Python function to PTX for a given signature for the current
    device's compute capabilility. See :func:`compile_ptx`.)r   r   r   r   r   r   r   r   )r   r.   r   )
r   r   r   r   r   r   r   r   r   r   s
             r!   compile_ptx_for_current_devicer     s?    
 
			0Bvs%($xBC3 3 3 3r#   c                 .    t          | ||          j        S r   ) declare_device_function_templatekeyrb   r   r   s      r!   declare_device_functionr     s    +D'8DDHHr#   c                 (  	 ddl m} |j        }|j        }t	          j        |g|R  	t          | 	           G 	fddt                    }t          j	        | ||          }|
                    |           |
                    |           |S )Nrx   ry   c                       e Zd Z ZgZdS )Bdeclare_device_function_template.<locals>.device_function_templateN)r)   r*   r+   r   cases)extfnr   s   r!   device_function_templater     s        r#   r   r   )r   rz   rF   rG   r   rL   ExternFunctionr   r   ExternalFunctionDescriptorinsert_user_function)
rb   r   r   rz   rP   rQ   r   rM   r   r   s
           @@r!   r   r     s    ''''''*I*I

7
.X
.
.
.C4%%E       #3    07X7 7 7F""5*BCCC""5&111##r#   c                       e Zd Zd ZdS )r   c                 "    || _         || _        d S r   )rb   r   )r5   rb   r   s      r!   rB   zExternFunction.__init__  s    	r#   N)r)   r*   r+   rB   r/   r#   r!   r   r     s#            r#   r   )FFFFNN)	FFTFNTr   Nr   )FFTFTr   Nr   )FFFFNTr   N)FFFFTr   N)5llvmliter   numba.core.typing.templatesr   
numba.corer   r   r   r   r	   r
   numba.core.compilerr   r   r   r   r   r   numba.core.compiler_lockr   numba.core.compiler_machineryr   r   r   numba.core.errorsr   numba.core.typed_passesr   r   r   warningsr   numba.cuda.apir   numba.cuda.targetr   r"   r%   r1   r:   r>   rY   re   r   r   r   r   r   r   r   r   objectr   r/   r#   r!   <module>r      s         8 8 8 8 8 8 J J J J J J J J J J J J J J J J0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 : 9 9 9 9 9G G G G G G G G G G 7 7 7 7 7 74 4 4 4 4 4 4 4 4 4       - - - - - - . . . . . .  
 
 
 
 
 
 
 
:       ( ( (
 4u555    ,   656 5666    L   76.    <   B BG<@7 7 7 7t) ) )X =AAEg g g gT CH@D>C	5 	5 	5 	5 BGIM4 4 4 4 GLEI9=3 3 3 3I I I$ $ $&    V     r#   