
    J/Ph/                         d dl mZ d dlmZmZ d dlmZmZ ddlm	Z	m
Z
mZmZ d dlmZ d dlZd dlZd dlZdZd	 Zd
 Zd Z G d dej        e          Z G d de          ZdS )    )ir)config	serialize)CodegenCodeLibrary   )devicesdrivernvvmruntime)get_cudalibNznvptx64-nvidia-cudac                 R   d }d }	 t          j                    \  }}t          |d          5 }|                    |            d d d            n# 1 swxY w Y   	 t	          j        dg||dt          j        t          j                  }n$# t          $ r}d}t          |          |d }~ww xY w|j	        
                    d          |t          j        |           |t          j        |           S S # |t          j        |           |t          j        |           w w xY w)NwbnvdisasmT)checkstdoutstderrztnvdisasm has not been found. You may need to install the CUDA toolkit and ensure that it is available on your PATH.
zutf-8)tempfilemkstempopenwrite
subprocessrunPIPEFileNotFoundErrorRuntimeErrorr   decodeoscloseunlink)cubinflagsfdfnamefcpemsgs           R/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/numba/cuda/codegen.pyrun_nvdisasmr*      s    
BE$&&	E% 	!GGENNN	 	 	 	 	 	 	 	 	 	 	 	 	 	 		+ ;e ;U ;4'1'18 8 8BB ! 	+ 	+ 	+5C s##*		+
 y((>HRLLLIe  >HRLLLIe sR   &C7 AC7 AC7 AC7 1B C7 
B-B((B--C7 7/D&c                 (    dg}t          | |          S )Nz-gir*   r!   r"   s     r)   disassemble_cubinr.   +   s    GEu%%%    c                 (    dg}t          | |          S )Nz-cfgr,   r-   s     r)   disassemble_cubin_for_cfgr1   1   s    HEu%%%r/   c                        e Zd ZdZ	 	 d fd	Zed             Zd Zd ZddZ	ddZ
dd	Zd
 Zd ZddZddZd Zd Zd Zd Zed             Zed             Zd Zd Zed             Z xZS )CUDACodeLibraryz
    The CUDACodeLibrary generates PTX, SASS, cubins for multiple different
    compute capabilities. It also loads cubins to multiple devices (via
    get_cufunc), which may be of different compute capabilities.
    Nc                 :   t                                          ||           d| _        t                      | _        t                      | _        d| _        d| _        i | _        i | _	        i | _
        i | _        i | _        || _        |i }|| _        || _        dS )a  
        codegen:
            Codegen object.
        name:
            Name of the function in the source.
        entry_name:
            Name of the kernel function in the binary, if this is a global
            kernel and not a device function.
        max_registers:
            The maximum register usage to aim for when linking.
        nvvm_options:
                Dict of options to pass to NVVM.
        NF)super__init___moduleset_linking_libraries_linking_filesneeds_cudadevrt
_llvm_strs
_ptx_cache_ltoir_cache_cubin_cache_linkerinfo_cache_cufunc_cache_max_registers_nvvm_options_entry_name)selfcodegenname
entry_namemax_registersnvvm_options	__class__s         r)   r6   zCUDACodeLibrary.__init__>   s     	$''' 
 #&%% "ee$ !#+L)%r/   c                 J    | j         d | j        D             | _         | j         S )Nc                 ,    g | ]}t          |          S  )str).0mods     r)   
<listcomp>z-CUDACodeLibrary.llvm_strs.<locals>.<listcomp>r   s    @@@Cs3xx@@@r/   )r<   modulesrE   s    r)   	llvm_strszCUDACodeLibrary.llvm_strso   s)    ?"@@4<@@@DOr/   c                 6    d                     | j                  S )Nz

)joinrU   rT   s    r)   get_llvm_strzCUDACodeLibrary.get_llvm_stru   s    {{4>***r/   c                 H    ||S t          j                    j        }|j        S N)r	   get_contextdevicecompute_capability)rE   ccr\   s      r)   
_ensure_cczCUDACodeLibrary._ensure_ccx   s&    >I$&&-((r/   c                 (   |                      |          }| j                            |d           }|r|S t          j        | }| j                                        }||d<   | j        }t          j        |fi |}|	                                
                    d          
                                }t          j        rIt          d| j        z                      dd                     t          |           t          d           || j        |<   |S )Narch zASSEMBLY %sP   -zP================================================================================)r_   r=   getr   get_arch_optionrC   copyrU   
compile_irr   stripr   DUMP_ASSEMBLYprint_namecenter)rE   r^   ptxesra   optionsirsptxs          r)   get_asm_strzCUDACodeLibrary.get_asm_str   s    __R  ##B-- 	L#R($))++noc--W--
 jjll  ((..00 	=4:-55b#>>???#JJJ(OOO!
r/   c                    |                      |          }| j                            |d           }||S t          j        | }| j                                        }||d<   d |d<   | j        }t          j        |fi |}|| j        |<   |S )Nra   zgen-lto)	r_   r>   re   r   rf   rC   rg   rU   rh   )rE   r^   ltoirra   ro   rp   s         r)   	get_ltoirzCUDACodeLibrary.get_ltoir   s    __R  !%%b$//L#R($))++!	n//w// %"r/   c                    |                      |          }| j                            |d           }|r|S t          j                            | j        |          }|j        r,|                     |          }|	                    |           n=| 
                    |          }|                    |                                           | j        D ]}|                    |           | j        r$|                    t!          dd                     |                                }|| j        |<   |j        | j        |<   |S )N)rI   r^   r^   	cudadevrtT)static)r_   r?   re   r
   LinkernewrB   ltoru   	add_ltoirrr   add_ptxencoder:   add_file_guess_extr;   r   completeinfo_logr@   )rE   r^   r!   linkerrt   rq   paths          r)   	get_cubinzCUDACodeLibrary.get_cubin   s?   __R  !%%b$// 	L""1D"LL: 	)NNbN))EU####""b"))CNN3::<<(((' 	, 	,D%%d++++ 	M%%k+d&K&K&KLLL!! %"%+_r"r/   c                 d   | j         d}t          |          t          j                    }|j        }| j                            |j        d           }|r|S |                     |j	                  }|
                    |          }|                    | j                   }|| j        |j        <   |S )NzLMissing entry_name - are you trying to get the cufunc for a device function?rw   )rD   r   r	   r[   r\   rA   re   idr   r]   create_module_imageget_function)rE   r(   ctxr\   cufuncr!   modules          r)   
get_cufunczCUDACodeLibrary.get_cufunc   s    #+Cs###!###''	488 	M&";<<((// $$T%566 )/69%r/   c                 ^    	 | j         |         S # t          $ r t          d|           w xY w)NzNo linkerinfo for CC )r@   KeyErrorrE   r^   s     r)   get_linkerinfozCUDACodeLibrary.get_linkerinfo   sG    	9)"-- 	9 	9 	97277888	9s    ,c                 H    t          |                     |                    S Nrw   )r.   r   r   s     r)   get_sasszCUDACodeLibrary.get_sass   s     2!6!6777r/   c                 H    t          |                     |                    S r   )r1   r   r   s     r)   get_sass_cfgzCUDACodeLibrary.get_sass_cfg   s    (2)>)>???r/   c                 h    |                                   | j        t          d          || _        d S )Nz(CUDACodeLibrary only supports one module)_raise_if_finalizedr7   r   )rE   rQ   s     r)   add_ir_modulezCUDACodeLibrary.add_ir_module   s6      """<#IJJJr/   c                     |                                  |                                  | j                            |           d S rZ   )_ensure_finalizedr   r9   add)rE   librarys     r)   add_linking_libraryz#CUDACodeLibrary.add_linking_library   sE    !!###
 	  """##G,,,,,r/   c                 :    | j                             |           d S rZ   )r:   r   )rE   filepaths     r)   add_linking_filez CUDACodeLibrary.add_linking_file   s    )))))r/   c                 d    | j         j        D ]}|j        |k    r|c S t          d| d          )Nz	Function z
 not found)r7   	functionsrG   r   )rE   rG   fns      r)   r   zCUDACodeLibrary.get_function   sI    ,( 	 	Bw$			 34333444r/   c                 6    | j         gd | j        D             z   S )Nc                 &    g | ]}|j         D ]}|S rN   )rS   )rP   librQ   s      r)   rR   z+CUDACodeLibrary.modules.<locals>.<listcomp>  s>     !9 !9 !9,/K!9 !9%( "% !9 !9 !9 !9r/   )r7   r9   rT   s    r)   rS   zCUDACodeLibrary.modules  s1    ~ !9 !90G !9 !9 !9 9 	9r/   c                 |    g }| j         D ]1}|                    |j                   |                    |           2|S rZ   )r9   extendlinking_librariesappend)rE   libsr   s      r)   r   z!CUDACodeLibrary.linking_libraries  sJ    
 * 	 	CKK-...KKr/   c                     |                                   | j        D ]$}|j        D ]}|j        D ]}|j        sd|_        %d| _        d S )Nlinkonce_odrT)r   r9   rS   r   is_declarationlinkage
_finalized)rE   r   rQ   r   s       r)   finalizezCUDACodeLibrary.finalize  sv     	  """ . 	4 	4G 4 4- 4 4B, 4%3
44
 r/   c                     | j         rd}t          |          | j        st          d          t          d| j        | j        | j        | j        | j        | j	        | j
        | j        | j        
  
        S )z
        Reduce the instance for serialization. We retain the PTX and cubins,
        but loaded functions are discarded. They are recreated when needed
        after deserialization.
        z0Cannot pickle CUDACodeLibrary with linking filesz)Cannot pickle unfinalized CUDACodeLibraryN)
rF   rG   rH   rU   	ptx_cachecubin_cachelinkerinfo_cacherI   rJ   r;   )r:   r   r   dictrG   rD   rU   r=   r?   r@   rB   rC   r;   )rE   r(   s     r)   _reduce_stateszCUDACodeLibrary._reduce_states0  s      	$DCs### 	LJKKK'no)!3-+ 0
 
 
 	
r/   c                      | |||          }||_         ||_        ||_        ||_        ||_        |	|_        |
|_        d|_        |S )z&
        Rebuild an instance.
        )rH   T)r<   r=   r?   r@   rB   rC   r;   r   )clsrF   rG   rH   rU   r   r   r   rI   rJ   r;   instances               r)   _rebuildzCUDACodeLibrary._rebuildH  s_     3w<<<'' +%5""/!-#2 "r/   )NNNrZ   )__name__
__module____qualname____doc__r6   propertyrU   rX   r_   rr   ru   r   r   r   r   r   r   r   r   r   rS   r   r   r   classmethodr   __classcell__)rK   s   @r)   r3   r3   7   s         FJ"/& /& /& /& /& /&b   X
+ + +) ) )   :   $   6  09 9 98 8 8 8@ @ @ @  - - -* * *5 5 5 9 9 X9   X  4
 
 
0   [    r/   r3   c                   .    e Zd ZdZeZd Zd Zd Zd Z	dS )JITCUDACodegenz
    This codegen implementation for CUDA only generates optimized LLVM IR.
    Generation of PTX code is done separately (see numba.cuda.compiler).
    c                     d S rZ   rN   )rE   module_names     r)   r6   zJITCUDACodegen.__init__g      r/   c                     t          j        |          }t          |_        t	          j                    j        |_        t	          j        |           |S rZ   )r   ModuleCUDA_TRIPLEtripler   NVVMdata_layoutadd_ir_version)rE   rG   	ir_modules      r)   _create_empty_modulez#JITCUDACodegen._create_empty_modulej  s?    IdOO	&	 $	 7	I&&&r/   c                     d S rZ   rN   )rE   r   s     r)   _add_modulezJITCUDACodegen._add_moduleq  r   r/   c                     t          j                    }|j        j        }t          j                                        |fS )zP
        Return a tuple unambiguously describing the codegen behaviour.
        )r	   r[   r\   r]   r   get_version)rE   r   r^   s      r)   magic_tuplezJITCUDACodegen.magic_tuplet  s5     !##Z*++--r22r/   N)
r   r   r   r   r3   _library_classr6   r   r   r   rN   r/   r)   r   r   _  sa         
 %N      3 3 3 3 3r/   r   )llvmliter   
numba.corer   r   numba.core.codegenr   r   cudadrvr	   r
   r   r   numba.cuda.cudadrv.libsr   r   r   r   r   r*   r.   r1   ReduceMixinr3   r   rN   r/   r)   <module>r      s;         ( ( ( ( ( ( ( ( 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 / / / / / / 				      $  6& & && & &e e e e ei+[ e e eP	3 3 3 3 3W 3 3 3 3 3r/   