
    J/PhV                       d Z ddlZddlZddlZddlZddlZddlZddlZddlZddl	Z	ddl
Z
ddlmZ ddlmZmZ ddlmZmZmZmZmZmZmZmZmZ ddlZddlZddlZddlmZmZ ddl m!Z! ddl"m#Z#m$Z$m%Z% d	d
l&m'Z'm(Z( d	dl)m*Z* d	dl)m+Z+m,Z,m-Z- ddl.m/Z/m)Z)m0Z0m1Z1 e%j2        Z3e3rddl4m4Z5 dZ6dZ7ej8        9                    d          Z:ej;        j<        Z=ej;        j>        Z?ej@        ge=_A        ej@        ge?_A        d ZB G d deC          ZD G d deC          ZE G d de(          ZFd ZGd ZHd ZIdZJdZKd ZLd ZMd ZNd  ZO eN            ZP G d! d"eQ          ZR G d# d$eQ          ZS eR            ZTd% ZU eU            ZV G d& d'eQ          ZWd( ZX G d) d*eQe+          ZY G d, d-eY          ZZ G d. d/          Z[ G d0 d1e[eZ          Z\d	Z]da^d2 Z_d3 Z` G d4 d5ea          Zb eb            Zb G d6 d7eQ          Zc ed8d9          Zd	  G d: d;eQ          Zed< Zfd= Zgd> Zhd? Zid@ ZjdA ZkdB ZldC ZmdD Zn G dE dFeQ          Zo G dG dHeQ          Zp G dI dJeQ          Zq G dK dLeQ          Zr G dM dNer          Zs G dO dPes          Zt G dQ dRe!ju                  Zv G dS dTes          Zw G dU dVeQ          Zx G dW dXexe!ju                  Zy G dY dZexe!ju                  Zz G d[ d\eQ          Z{ G d] d^eQ          Z|d_ Z} G d` dae+          Z~ G db dce~          Z G dd dee~          Z edfg dg          Z G dh die+          Z G dj dke          Z G dl dme          Z	 ddoZe3r/e5j        Zej        ej        ej        ej        ej        ej        dpZn'e/j        e/j        e/j        e/j        e/j        e/j        dpZ G dq dre+          ZdsZ G dt due          Z G dv dwe          Z G dx dye          Zdz Zd{ Zd| Zd} Zd~ ZddZd Zd Zd Zd Zd Zd Zd Zd ZddZddZddZddZd Zd Zej        d             Zd ZdS )a  
CUDA driver bridge implementation

NOTE:
The new driver implementation uses a *_PendingDeallocs* that help prevents a
crashing the system (particularly OSX) when the CUDA context is corrupted at
resource deallocation.  The old approach ties resource management directly
into the object destructor; thus, at corruption of the CUDA context,
subsequent deallocation could further corrupt the CUDA context and causes the
system to freeze in some cases.

    N)product)ABCMetaabstractmethod)	c_intbyrefc_size_tc_charc_char_p	addressofc_void_pc_floatc_uint)
namedtupledeque)mviewbuf)utils	serializeconfig   )CudaSupportErrorCudaDriverError)API_PROTOTYPES)cu_occupancy_b2d_sizecu_stream_callback_pyobjcu_uuid)enumsdrvapinvrtc_extrascuda)      linuxc                  p   t          j        t                    } |                                 st	          t
          j                                                  }t          t           |d           }t          |t                    st           j        }|                     |           t
          j        r^t          j        t          j                  }d}|                    t          j        |                     |                     |           n&|                     t          j                               | S )Nz;== CUDA [%(relativeCreated)d] %(levelname)5s -- %(message)s)fmt)logging	getLogger__name__hasHandlersstrr   CUDA_LOG_LEVELuppergetattr
isinstanceintCRITICALsetLevelStreamHandlersysstderrsetFormatter	Formatter
addHandlerNullHandler)loggerlvlhandlerr&   s       Y/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/numba/cuda/cudadrv/driver.pymake_loggerr>   ;   s    x((F 5&'((..00gsD))#s## 	#"C  	5+CJ77GOC  !2s!;!;!;<<<g&&&& g133444M    c                       e Zd ZdS )DeadMemoryErrorNr)   
__module____qualname__ r?   r=   rA   rA   S           Dr?   rA   c                       e Zd ZdS )LinkerErrorNrB   rE   r?   r=   rH   rH   W   rF   r?   rH   c                   $     e Zd Z fdZd Z xZS )CudaAPIErrorc                 v    || _         || _        t          t          |                               ||           d S N)codemsgsuperrJ   __init__)selfrM   rN   	__class__s      r=   rP   zCudaAPIError.__init__\   s7    	lD!!**455555r?   c                 &    d| j         d| j        S )N[z] )rM   rN   rQ   s    r=   __str__zCudaAPIError.__str__a   s      IIItxx00r?   )r)   rC   rD   rP   rV   __classcell__rR   s   @r=   rJ   rJ   [   sG        6 6 6 6 6
1 1 1 1 1 1 1r?   rJ   c                  (   t           j        } | dk    rt                       t          j        dk    rt
          j        }dg}dg}n7t          j        dk    rt
          j        }dg}dg}nt
          j        }dd	g}d
dg}| rv	 t          j	        
                    |           } n # t          $ r t          d| z            w xY wt          j	                            |           st          d| z            | g}n|d t          ||          D             z   }||fS )N0win32z\windows\system32z
nvcuda.dlldarwinz/usr/local/cuda/libzlibcuda.dylibz/usr/libz
/usr/lib64z
libcuda.sozlibcuda.so.1z(NUMBA_CUDA_DRIVER %s is not a valid pathzoNUMBA_CUDA_DRIVER %s is not a valid file path.  Note it must be a filepath of the .so/.dll/.dylib or the driverc                 T    g | ]%\  }}t           j                            ||          &S rE   )ospathjoin).0xys      r=   
<listcomp>z,locate_driver_and_loader.<locals>.<listcomp>   sB      E  E  E$(Aq !#Q 2 2  E  E  Er?   )r   CUDA_DRIVER_raise_driver_not_foundr4   platformctypesWinDLLCDLLr^   r_   abspath
ValueErrorisfiler   )envpathdlloaderdldirdlnames
candidatess        r=   locate_driver_and_loaderrs   e   sp    G#~~!!! |w=&'.		!	!;&'"# ;\*0 E	&goog..GG 	& 	& 	&G$% & & &	& w~~g&& 	D 9;BC D D D Y

   E  E,3E7,C,C E  E  E E
 Zs   >B B;c                    g }g }|D ]r}	  | |          }||fc S # t           $ rR}|                    t          j                            |                      |                    |           Y d }~kd }~ww xY wt          |          rt                       d S d                    d |D                       }t          |           d S )N
c              3   4   K   | ]}t          |          V  d S rL   r+   )ra   es     r=   	<genexpr>zload_driver.<locals>.<genexpr>   s(      ==a3q66======r?   )	OSErrorappendr^   r_   rm   allrf   r`   _raise_driver_error)ro   rr   path_not_existdriver_load_errorr_   dllrx   errmsgs           r=   load_driverr      s    N  	(4..C 9  	( 	( 	(!!bgnnT&:&:":;;;$$Q''''''''	( > $!!!!!==+<=====F#####s   
A7AA22A7c                  N    t                      \  } }t          | |          \  }}|S rL   )rs   r   )ro   rr   r   r_   s       r=   find_driverr      s*    355HjHj11ICJr?   z
CUDA driver library cannot be found.
If you are sure that a CUDA driver is installed,
try setting environment variable NUMBA_CUDA_DRIVER
with the file path of the CUDA driver shared library.
zM
Possible CUDA driver libraries are found but error occurred during load:
%s
c                  *    t          t                    rL   )r   DRIVER_NOT_FOUND_MSGrE   r?   r=   rf   rf      s    
/
0
00r?   c                 0    t          t          | z            rL   )r   DRIVER_LOAD_ERROR_MSG)rx   s    r=   r}   r}      s    
014
5
55r?   c                      d} t          j                    }t          t                    D ]1}|                    |           rt          t          |          }|||<   2|S )N
CUDA_ERROR)r   
UniqueDictdirr   
startswithr.   )prefixmapnamerM   s       r=   _build_reverse_error_mapr      s\    F



CE

  ??6"" 	5$''DCIJr?   c                  (    t          j                    S rL   )r^   getpidrE   r?   r=   _getpidr      s    9;;r?   c                       e Zd ZdZdZd Zd Zd Zd Ze	d             Z
d Zdd	Zd
 Zd Zd Zd Zd ZddZd Zd Zd Zd Zd Zd ZdS )Driverz0
    Driver API functions are lazily bound.
    Nc                 ^    | j         }||S t                              |           }|| _         |S rL   )
_singletonobject__new__)clsobjs     r=   r   zDriver.__new__   s0    n?J..%%C CN
r?   c                    t          j                    | _        d| _        d | _        d | _        	 t          j        rd}t          |          t                      | _
        d S # t          $ r}d| _        |j        | _        Y d }~d S d }~ww xY w)NFzzCUDA is disabled due to setting NUMBA_DISABLE_CUDA=1 in the environment, or because CUDA is unsupported on 32-bit systems.T)r   r   devicesis_initializedinitialization_errorpidr   DISABLE_CUDAr   r   librN   )rQ   rN   rx   s      r=   rP   zDriver.__init__   s    '))#$(!		." ,) 's+++"}}DHHH 	. 	. 	."&D()D%%%%%%%	.s   0A! !
B	+BB	c                 l   | j         rd S t                      ad| _         	 t                              d           |                     d           t                      | _        n=# t          $ r0}|j         d|j	         d}|| _
        t          d|           d }~ww xY w|                                  d S )NTinitr   z ()zError at driver init: )r   r>   _loggerinfocuInitr   r   rJ   rN   rM   r   r   _initialize_extras)rQ   rx   descriptions      r=   ensure_initializedzDriver.ensure_initialized   s     	F --"	!LL   KKNNN yyDHH  	K 	K 	KU//af///K(3D%"#IK#I#IJJJ	K 	!!!!!s   /A# #
B-+BBc                    t           rd S t          j        d t                    } |t          j                  } ||                     d                     t          j        t          t          j        t          j
                  t          j        t          j                  t          j                  } |t          j                  }d|_        |                     d|          }|| _        d S )NcuIpcOpenMemHandlecall_cuIpcOpenMemHandle)USE_NV_BINDINGrh   	CFUNCTYPEr   r   set_cuIpcOpenMemHandle	_find_apir   POINTERr   cu_device_ptrcu_ipc_mem_handler   r   r)   _ctypes_wrap_fnr   )rQ   	set_protor   
call_protor   	safe_calls         r=   r   zDriver._initialize_extras  s     	F $T844	!*7+I!J!Jt~~.BCCDDD%e&,nV5I&J&J&,nV5M&N&N&,m5 5
 #-*W-L"M"M+D((()B)@B B	 #,r?   c                 <    |                                   | j        d u S rL   )r   r   rU   s    r=   is_availablezDriver.is_available  s"    !!!(D00r?   c                     |                                   | j        t          d| j        z            t          r|                     |          S |                     |          S )NzError at driver init: 
%s:)r   r   r   r   _cuda_python_wrap_fnr   rQ   fnames     r=   __getattr__zDriver.__getattr__"  so    !!!$0"#@#'#<$= > > >  	/,,U333''...r?   c                 l    a	 t                    }n# t          $ r t                    w xY w|d         }|dd          }                               |_        |_         fd} fd}t          j        r|}n|} t          j	                  |          }	t           |	           |	S )Nr   r   c                      d                     d | D                       }t                              dj        |            |  }                    |           d S )N, c                 ,    g | ]}t          |          S rE   rw   ra   args     r=   rd   zIDriver._ctypes_wrap_fn.<locals>.verbose_cuda_api_call.<locals>.<listcomp>?      999SC999r?   call driver api: %s(%s))r`   r   debugr)   _check_ctypes_error)argsargstrretcoder   libfnrQ   s      r=   verbose_cuda_api_callz5Driver._ctypes_wrap_fn.<locals>.verbose_cuda_api_call>  sc    YY99D999::FMM3U^VLLLeTlG$$UG44444r?   c                  ~    t                               dj                    |  }                    |           d S Nzcall driver api: %s)r   r   r)   r   )r   r   r   r   rQ   s     r=   safe_cuda_api_callz2Driver._ctypes_wrap_fn.<locals>.safe_cuda_api_callD  sA    MM/@@@eTlG$$UG44444r?   )r   KeyErrorAttributeErrorr   restypeargtypesr   CUDA_LOG_API_ARGS	functoolswrapssetattr)
rQ   r   r   protor   r   r   r   wrapperr   s
   ```       r=   r   zDriver._ctypes_wrap_fn/  s   =,&u- , , ,$U+++,AhGQRRyH NN5))E#EM%EN	5 	5 	5 	5 	5 	5 	5	5 	5 	5 	5 	5 	5 	5
 # 	)+GG(G*IOE**733	eY'''s    /c                      t          t                     fd} fd}t          j        r|}n|} t	          j                  |          }t           |           |S )Nc                      d                     d | D                       }t                              dj        |                                |            S )Nr   c                 ,    g | ]}t          |          S rE   rw   r   s     r=   rd   zNDriver._cuda_python_wrap_fn.<locals>.verbose_cuda_api_call.<locals>.<listcomp>V  r   r?   r   )r`   r   r   r)   _check_cuda_python_error)r   r   r   r   rQ   s     r=   r   z:Driver._cuda_python_wrap_fn.<locals>.verbose_cuda_api_callU  sX    YY99D999::FMM3U^VLLL00tEEEr?   c                  v    t                               dj                                        |            S r   )r   r   r)   r   )r   r   r   rQ   s    r=   r   z7Driver._cuda_python_wrap_fn.<locals>.safe_cuda_api_callZ  s6    MM/@@@00tEEEr?   )r.   bindingr   r   r   r   r   )rQ   r   r   r   r   r   r   s   ``    @r=   r   zDriver._cuda_python_wrap_fnR  s    ''	F 	F 	F 	F 	F 	F 	F
	F 	F 	F 	F 	F 	F 	F # 	)+GG(G*IOE**733	eY'''r?   c                     t           j        r
t          sd}nd}|D ]-}	 t          | j         |           c S # t
          $ r Y *w xY wfd}t          | |           |S )N)_v2_ptds_v2_ptsz_ptds_ptsz_v2 )r   r   c                  (    t          d           )NzDriver missing function: )r   )r   kwsr   s     r=   absent_functionz)Driver._find_api.<locals>.absent_functionx  s    !"Ee"E"EFFFr?   )r   CUDA_PER_THREAD_DEFAULT_STREAMr   r.   r   r   r   )rQ   r   variantsvariantr   s    `   r=   r   zDriver._find_apig  s     0 	# 	#LHH"H 	 	GtxE)<7)<)<=====!   
	G 	G 	G 	G 	G 	e_---s   :
AAc                     | j         Ut                      | j         k    r@d}t                              |t                      | j                    t	          d          d S d S )Nz0pid %s forked from pid %s after CUDA driver initzCUDA initialized before forking)r   r   r   criticalr   )rQ   rN   s     r=   _detect_forkzDriver._detect_fork~  sZ    8GII$9$9DCS'))TX666!"CDDD  $9$9r?   c                    |t           j        k    rqt                              |d          }d|d|}t                              |           |t           j        k    r|                                  t          ||          d S )NUNKNOWN_CUDA_ERRORCall to  results in )	r   CUDA_SUCCESS	ERROR_MAPgetr   errorCUDA_ERROR_NOT_INITIALIZEDr   rJ   )rQ   r   r   errnamerN   s        r=   r   zDriver._check_ctypes_error  s    e(((mmG-ABBGG05ww?CMM#%:::!!###w,,, )(r?   c                 J   |d         }|dd          }t          |          dk    r|d         }|t          j        j        k    r`d|d|j        }t
                              |           |t          j        j        k    r|                                  t          ||          |S )Nr   r   r   r   )
lenr   CUresultr   r   r   r   r   r   rJ   )rQ   r   returnedr   retvalrN   s         r=   r   zDriver._check_cuda_python_error  s    1+!""v;;!AYFg&333305w||DCMM#'*EEE!!###w,,,r?   r   c                     | j                             |          }|t          |          }|| j         |<   t          j        |          S rL   )r   r   Deviceweakrefproxy)rQ   devnumdevs      r=   
get_devicezDriver.get_device  sC    lv&&;..C#&DL }S!!!r?   c                     t           r|                                 S t                      }|                     t          |                     |j        S rL   )r   cuDeviceGetCountr   r   value)rQ   counts     r=   get_device_countzDriver.get_device_count  sG     	+((***eEll+++{r?   c                 N    t          | j                                                  S )z)Returns a list of active devices
        )listr   valuesrU   s    r=   list_deviceszDriver.list_devices  s      DL''))***r?   c                 f    | j                                         D ]}|                                 dS )zReset all devices
        N)r   r  reset)rQ   r  s     r=   r  zDriver.reset  s:     <&&(( 	 	CIIKKKK	 	r?   c                 X   |                                  5 }|j        tt          r%t                                          cddd           S t          j                    }t                              t          |                     |cddd           S 	 ddd           dS # 1 swxY w Y   dS )znPop the active CUDA context and return the handle.
        If no CUDA context is active, return None.
        N)get_active_contextr  r   drivercuCtxPopCurrentr   
cu_contextr   )rQ   acpoppeds      r=   pop_active_contextzDriver.pop_active_context  s    $$&& 	""y$! "!1133	" 	" 	" 	" 	" 	" 	" 	"
 $.00F**5==999!	" 	" 	" 	" 	" 	" 	" 	"$	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	"s   'B	;BB#&B#c                     t                      S )z3Returns an instance of ``_ActiveContext``.
        )_ActiveContextrU   s    r=   r  zDriver.get_active_context  s     r?   c                     t           rt                                          }nGt          j        d          }t                              t          j        |                     |j        }|dz  }||dz  z
  dz  }||fS )zM
        Returns the CUDA Runtime version as a tuple (major, minor).
        r   i  
   )r   r  cuDriverGetVersionrh   r   r   r  )rQ   versiondvmajorminors        r=   get_versionzDriver.get_version  sy      	//11GGaB%%fl2&6&6777hG 4EDL)b0u~r?   rL   r   )r)   rC   rD   __doc__r   r   rP   r   r   propertyr   r   r   r   r   r   r   r   r	  r  r  r  r  r  r&  rE   r?   r=   r   r      s\         J  . . . " " "*, , ,* 1 1 X1/ / /! ! ! !F  *  .E E E- - -  " " " "  + + +
  " " "     
    r?   r   c                   F    e Zd ZdZ ej                    Zd Zd Zd Z	e	Z
dS )r  zAn contextmanager object to cache active context to reduce dependency
    on querying the CUDA driver API.

    Once entering the context, it is assumed that the active CUDA context is
    not changed until the context is exited.
    c                    d}t          | j        d          r| j        j        \  }}n t          r/t                                          }t          |          dk    rd }nFt          j        d          }t                              t          |                     |j
        r|nd }|d }nt          r't          t                                                    }nAt          j                    }t                              t          |                     |j
        }||f| j        _        d}|| _        || _        || _        | S )NF
ctx_devnumr   T)hasattr
_tls_cacher,  r   r  cuCtxGetCurrentr0   r   r  r   r  cuCtxGetDevice	cu_device_is_topcontext_handler  )rQ   is_tophctxr  hdevices        r=   	__enter__z_ActiveContext.__enter__  s!   4?L11 	?5LD&&  4--//t99>>D(++&&uT{{333#z3ttt|! + !6!6!8!899FF$.00G))%..999$]F.2F^*"r?   c                 B    | j         rt          | j        d           d S d S )Nr,  )r2  delattrr.  )rQ   exc_typeexc_valexc_tbs       r=   __exit__z_ActiveContext.__exit__  s.    < 	3DO\22222	3 	3r?   c                     | j         duS )zAReturns True is there's a valid and active CUDA context.
        N)r3  rU   s    r=   __bool__z_ActiveContext.__bool__  s     "$..r?   N)r)   rC   rD   r(  	threadinglocalr.  r7  r=  r?  __nonzero__rE   r?   r=   r  r    s^          !""J     D3 3 3/ / /
 KKKr?   r  c                      d} t          j                    }t          t                    D ]D}|                    |           r-t          t          |          ||t          |           d          <   E|S )NCU_DEVICE_ATTRIBUTE_)r   r   r   r   r   r.   r   )r   r   r   s      r=   _build_reverse_device_attrsrE    sg    #F



CE

 ; ;??6"" 	;&-eT&:&:CS[[\\"#Jr?   c                   z    e Zd ZdZed             Zd Zd Zd Zd Z	d Z
d Zd	 Zd
 Zd Zd Zed             ZdS )r  z
    The device object owns the CUDA contexts.  This is owned by the driver
    object.  User should not construct devices directly.
    c                    t          t                                                    D ]8}t                              |          }|                                |k    r|c S 9d                    |          }t          |          )zgCreate Device object from device identity created by
        ``Device.get_device_identity()``.
        zKNo device of {} is found. Target device may not be visible in this process.)ranger  r  r	  get_device_identityformatRuntimeError)rQ   identitydeviddr   s        r=   from_identityzDevice.from_identity$  s    
 6224455 		' 		'E!!%((A$$&&(22 3DfX  v&&&r?   c           
         t           r1t                              |          }|| _        t	          |          }nDt                      }t                              t          |          |           |j        }|| _        d| d| }||k    rt          |          i | _	        | j
        | j        f| _        d}t           rIt                              || j                  }|                    d                              d          }n9t!          |z              }t                              ||| j                   |j        }|| _        t           r4t                              | j                  }t'          |j                  }	nWt+                      }t                              t          |          | j                   t'          t)          |                    }	d}
|
dz  }|
dz  }|
d	z  }d
| d| d| d| d| 
}||	z  | _        d | _        d S )NzDriver returned device z instead of    utf-8 z%02x         zGPU--)r   r  cuDeviceGetidr0   r   r   r  rK  
attributesCOMPUTE_CAPABILITY_MAJORCOMPUTE_CAPABILITY_MINORcompute_capabilitycuDeviceGetNamedecoderstripr	   r   cuDeviceGetUuidtuplebytesr   uuidprimary_context)rQ   r  result
got_devnumrN   bufszbufr   rd  	uuid_valsbb2b4b6r&   s                  r=   rP   zDevice.__init__4  s     	!''//FDGVJJWWFuV}}f555J DGH
HHHHZs### $(#@#'#@#B  	((88C::g&&--d33DDE>$$C""3tw7779D	  	+))$'22Ddj))II99D""5;;888eDkk**IUUU-R--"--r--B----)O	#r?   c                 ,    | j         | j        | j        dS )N)pci_domain_id
pci_bus_idpci_device_id)PCI_DOMAIN_ID
PCI_BUS_IDPCI_DEVICE_IDrU   s    r=   rI  zDevice.get_device_identityh  s"    !//!/
 
 	
r?   c                 $    d| j         | j        fz  S )Nz<CUDA device %d '%s'>)rY  r   rU   s    r=   __repr__zDevice.__repr__o  s    &$'49)===r?   c                    t           r>t          t          j        d|           }t                              || j                  }no	 t          |         }n# t          $ r t          |          w xY wt                      }t                              t          |          || j                   |j        }t          | ||           |S )zRead attributes lazily
        rD  )r   r.   r   CUdevice_attributer  cuDeviceGetAttributerY  DEVICE_ATTRIBUTESr   r   r   r   r  r   )rQ   attrrM   r  rf  s        r=   r   zDevice.__getattr__r  s      	!758$88: :D//dg>>EE+(. + + +$T***+ WWF''ftTWEEELEdE"""s   A A/c                 *    t          | j                  S rL   )hashrY  rU   s    r=   __hash__zDevice.__hash__  s    DG}}r?   c                 P    t          |t                    r| j        |j        k    S dS NF)r/   r  rY  rQ   others     r=   __eq__zDevice.__eq__  s(    eV$$ 	'7eh&&ur?   c                     | |k     S rL   rE   r  s     r=   __ne__zDevice.__ne__  s    EM""r?   c                 `   | j         | j         S t          |            t          r t                              | j                  }n@t          j                    }t                              t          |          | j                   t          t          j        |           |          }|| _         |S )zo
        Returns the primary context for the device.
        Note: it is not pushed to the CPU thread.
        )re  met_requirement_for_devicer   r  cuDevicePrimaryCtxRetainrY  r   r  r   Contextr  r  )rQ   r5  ctxs      r=   get_primary_contextzDevice.get_primary_context  s    
 +''"4((( 	B2247;;DD$&&D++E$KKAAAgmD))400"
r?   c                 d    | j         r(t                              | j                   d| _         dS dS )zO
        Release reference to primary context if it has been retained.
        N)re  r  cuDevicePrimaryCtxReleaserY  rU   s    r=   release_primary_contextzDevice.release_primary_context  s=      	(,,TW555#'D   	( 	(r?   c                     	 | j         | j                                          |                                  t                              | j                   d S # t                              | j                   w xY wrL   )re  r  r  r  cuDevicePrimaryCtxResetrY  rU   s    r=   r  zDevice.reset  sq    	4#/$**,,,((*** **4733333F**473333s   4A !A8c                     | j         dk    S )N)r#   r"   )r]  rU   s    r=   supports_float16zDevice.supports_float16  s    &&00r?   N)r)   rC   rD   r(  classmethodrO  rP   rI  rw  r   r  r  r  r  r  r  r)  r  rE   r?   r=   r  r    s          ' ' ['2$ 2$ 2$h
 
 
> > >  (    
# # #  (( ( (4 4 4 1 1 X1 1 1r?   r  c                 X    | j         t          k     rt          | dt                    d S )Nz has compute capability < )r]  MIN_REQUIRED_CCr   )devices    r=   r  r    s:     ?22 & 9 : : 	: 32r?   c                       e Zd ZdZd Zed             Zed             Zed             Zed             Z	ed             Z
ed             Zed	             Zed
             Zeed                         ZdS )BaseCUDAMemoryManagerzAAbstract base class for External Memory Management (EMM) Plugins.c                 `    d|vrt          d          |                    d          | _        d S )Ncontextz!Memory manager requires a context)rK  popr  )rQ   r   kwargss      r=   rP   zBaseCUDAMemoryManager.__init__  s3    F""BCCCzz),,r?   c                     dS )z
        Allocate on-device memory in the current context.

        :param size: Size of allocation in bytes
        :type size: int
        :return: A memory pointer instance that owns the allocated memory
        :rtype: :class:`MemoryPointer`
        NrE   )rQ   sizes     r=   memalloczBaseCUDAMemoryManager.memalloc        r?   c                     dS )a  
        Allocate pinned host memory.

        :param size: Size of the allocation in bytes
        :type size: int
        :param mapped: Whether the allocated memory should be mapped into the
                       CUDA address space.
        :type mapped: bool
        :param portable: Whether the memory will be considered pinned by all
                         contexts, and not just the calling context.
        :type portable: bool
        :param wc: Whether to allocate the memory as write-combined.
        :type wc: bool
        :return: A memory pointer instance that owns the allocated memory. The
                 return type depends on whether the region was mapped into
                 device memory.
        :rtype: :class:`MappedMemory` or :class:`PinnedMemory`
        NrE   )rQ   r  mappedportablewcs        r=   memhostallocz"BaseCUDAMemoryManager.memhostalloc  r  r?   c                     dS )aZ  
        Pin a region of host memory that is already allocated.

        :param owner: The object that owns the memory.
        :param pointer: The pointer to the beginning of the region to pin.
        :type pointer: int
        :param size: The size of the region in bytes.
        :type size: int
        :param mapped: Whether the region should also be mapped into device
                       memory.
        :type mapped: bool
        :return: A memory pointer instance that refers to the allocated
                 memory.
        :rtype: :class:`MappedMemory` or :class:`PinnedMemory`
        NrE   rQ   ownerpointerr  r  s        r=   mempinzBaseCUDAMemoryManager.mempin  r  r?   c                     dS )z
        Perform any initialization required for the EMM plugin instance to be
        ready to use.

        :return: None
        NrE   rU   s    r=   
initializez BaseCUDAMemoryManager.initialize  r  r?   c                     dS )a  
        Return an IPC handle from a GPU allocation.

        :param memory: Memory for which the IPC handle should be created.
        :type memory: :class:`MemoryPointer`
        :return: IPC handle for the allocation
        :rtype: :class:`IpcHandle`
        NrE   rQ   memorys     r=   get_ipc_handlez$BaseCUDAMemoryManager.get_ipc_handle  r  r?   c                     dS )a  
        Returns ``(free, total)`` memory in bytes in the context. May raise
        :class:`NotImplementedError`, if returning such information is not
        practical (e.g. for a pool allocator).

        :return: Memory info
        :rtype: :class:`MemoryInfo`
        NrE   rU   s    r=   get_memory_infoz%BaseCUDAMemoryManager.get_memory_info  r  r?   c                     dS )zX
        Clears up all memory allocated in this context.

        :return: None
        NrE   rU   s    r=   r  zBaseCUDAMemoryManager.reset  r  r?   c                     dS )z
        Returns a context manager that ensures the implementation of deferred
        cleanup whilst it is active.

        :return: Context manager
        NrE   rU   s    r=   defer_cleanupz#BaseCUDAMemoryManager.defer_cleanup"  r  r?   c                     dS )z
        Returns an integer specifying the version of the EMM Plugin interface
        supported by the plugin implementation. Should always return 1 for
        implementations of this version of the specification.
        NrE   rU   s    r=   interface_versionz'BaseCUDAMemoryManager.interface_version+  r  r?   N)r)   rC   rD   r(  rP   r   r  r  r  r  r  r  r  r  r)  r  rE   r?   r=   r  r    s&       KK- - -
   ^   ^(   ^"   ^   ^   ^   ^   ^   ^ X  r?   r  )	metaclassc                   h     e Zd ZdZ fdZd Z	 	 d
dZddZd Zd Z	e
j        d	             Z xZS )HostOnlyCUDAMemoryManagera]  Base class for External Memory Management (EMM) Plugins that only
    implement on-device allocation. A subclass need not implement the
    ``memhostalloc`` and ``mempin`` methods.

    This class also implements ``reset`` and ``defer_cleanup`` (see
    :class:`numba.cuda.BaseCUDAMemoryManager`) for its own internal state
    management. If an EMM Plugin based on this class also implements these
    methods, then its implementations of these must also call the method from
    ``super()`` to give ``HostOnlyCUDAMemoryManager`` an opportunity to do the
    necessary work for the host allocations it is managing.

    This class does not implement ``interface_version``, as it will always be
    consistent with the version of Numba in which it is implemented. An EMM
    Plugin subclassing this class should implement ``interface_version``
    instead.
    c                      t                      j        |i | t          j                    | _        t                      | _        d S rL   )rO   rP   r   r   allocations_PendingDeallocsdeallocationsrQ   r   r  rR   s      r=   rP   z"HostOnlyCUDAMemoryManager.__init__G  sD    $)&))) +---//r?   c                     	  |            S # t           $ r^}t          rt          j        j        }nt
          j        }|j        |k    r(| j                                          |            cY d}~S  d}~ww xY w)z
        Attempt allocation by calling *allocator*.  If an out-of-memory error
        is raised, the pending deallocations are flushed and the allocation
        is retried.  If it fails in the second attempt, the error is reraised.
        N)	rJ   r   r   r   CUDA_ERROR_OUT_OF_MEMORYr   rM   r  clear)rQ   	allocatorrx   oom_codes       r=   _attempt_allocationz-HostOnlyCUDAMemoryManager._attempt_allocationL  s    	9;; 	 	 	 :"+D 9v!!"((*** y{{""""""	s!   	 
A4AA/(A4.A//A4Fc                 6  
 d
|r
t           j        z  
|r
t           j        z  
|r
t           j        z  
t          r+
fd}|r|                     |          n
 |            }n>t                      
fd}|r|                     |           n
 |             j        }t          | ||          }t          j
        | j                  }|r1t          ||          }	|	| j        |<   |	                                S t          ||          S )zImplements the allocation of pinned host memory.

        It is recommended that this method is not overridden by EMM Plugin
        implementations - instead, use the :class:`BaseCUDAMemoryManager`.
        r   c                  :    t                                          S rL   )r  cuMemHostAlloc)flagsr  s   r=   r  z9HostOnlyCUDAMemoryManager.memhostalloc.<locals>.allocators  s    ,,T5999r?   c                  Z    t                               t                                d S rL   )r  r  r   r  r  r  s   r=   r  z9HostOnlyCUDAMemoryManager.memhostalloc.<locals>.allocator  s'    %%eGnndEBBBBBr?   	finalizer)r   CU_MEMHOSTALLOC_DEVICEMAPCU_MEMHOSTALLOC_PORTABLECU_MEMHOSTALLOC_WRITECOMBINEDr   r  r   r  _hostalloc_finalizerr  r  r  MappedMemoryr  ownPinnedMemory)rQ   r  r  r  r  r  	alloc_keyr  r  memr  r  s    `        @@r=   r  z&HostOnlyCUDAMemoryManager.memhostallocc  s     	5U44E 	4U33E 	9U88E 	&: : : : : :  &229==#)++IIjjGC C C C C C C  ((3333	I(w	4PP	mDL)) 	IsGTYGGGC*-DY'7799WdiHHHHr?   c                   
 t          t                    rt          st                    t          r}nj        }d
|r
t
          j        z  

fd}|r|                     |           n
 |             t          | ||          }t          j
        | j                  }|r2t          |||          }	|	| j        |<   |	                                S t          |||          S )zImplements the pinning of host memory.

        It is recommended that this method is not overridden by EMM Plugin
        implementations - instead, use the :class:`BaseCUDAMemoryManager`.
        r   c                  @    t                                           d S rL   )r  cuMemHostRegisterr  s   r=   r  z3HostOnlyCUDAMemoryManager.mempin.<locals>.allocator  s!    $$WdE:::::r?   )r  r  )r/   r0   r   r   r  r   CU_MEMHOSTREGISTER_DEVICEMAPr  _pin_finalizerr  r  r  r  r  r  r  )rQ   r  r  r  r  r  r  r  r  r  r  s     ``      @r=   r  z HostOnlyCUDAMemoryManager.mempin  s6    gs## 	(N 	(w''G 	&III
  	8U77E	; 	; 	; 	; 	; 	; 	;  	$$Y////IKKK"4)VDD	mDL)) 	5sGT)24 4 4C*-DY'7799Wd%*35 5 5 5r?   c                 v   t           rfd}|                     |          }n6t          j                    fd}|                     |           j        }t          | |          }t          j        | j                  }t          ||          }|| j
        |<   |                                S )Nc                      t           j        } r| j        j        }n| j        j        }t
                              |          S rL   )r   CUmemAttach_flagsCU_MEM_ATTACH_GLOBALr  CU_MEM_ATTACH_HOSTr  cuMemAllocManaged)ma_flagsr  attach_globalr  s     r=   r  z<HostOnlyCUDAMemoryManager.memallocmanaged.<locals>.allocator  sA    "4  >$9?EE$7=E//e<<<r?   c                      t                      } rt          j        } nt          j        } t                              t                    |            d S rL   )r   r   r  r  r  r  r   )r  r  ptrr  s    r=   r  z<HostOnlyCUDAMemoryManager.memallocmanaged.<locals>.allocator  sJ      5!6EE!4E((sT5AAAAAr?   r  )r   r  r   r   r  _alloc_finalizerr  r  r  ManagedMemoryr  r  )	rQ   r  r  r  r  r  r  r  r  s	    ``     @r=   memallocmanagedz)HostOnlyCUDAMemoryManager.memallocmanaged  s     	"= = = = = = **955CII &((CB B B B B B B $$Y///	I$T3	4@@	mDL))Cdi@@@&)#wwyyr?   c                 j    | j                                          | j                                         dS )zClears up all host memory (mapped and/or pinned) in the current
        context.

        EMM Plugins that override this method must call ``super().reset()`` to
        ensure that host allocations are also cleaned up.N)r  r  r  rU   s    r=   r  zHostOnlyCUDAMemoryManager.reset  s4     	     """""r?   c              #   v   K   | j                                         5  dV  ddd           dS # 1 swxY w Y   dS )a@  Returns a context manager that disables cleanup of mapped or pinned
        host memory in the current context whilst it is active.

        EMM Plugins that override this method must obtain the context manager
        from this method before yielding to ensure that cleanup of host
        allocations is also deferred.N)r  disablerU   s    r=   r  z'HostOnlyCUDAMemoryManager.defer_cleanup  s       '')) 	 	EEE	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   .22FFFF)r)   rC   rD   r(  rP   r  r  r  r  r  
contextlibcontextmanagerr  rW   rX   s   @r=   r  r  5  s         "0 0 0 0 0
  . 9>.I .I .I .I`(5 (5 (5 (5T$ $ $L# # #       r?   r  c                       e Zd ZdZd ZdS )GetIpcHandleMixinzLA class that provides a default implementation of ``get_ipc_handle()``.
    c                    t          |          \  }}t          r?t                              |          }t	          |j                  t	          |          z
  }nJt          j                    }t                              t          |          |           |j        j	        |z
  }| j
        j                                        }t          |||j        ||          S )a[  Open an IPC memory handle by using ``cuMemGetAddressRange`` to
        determine the base pointer of the allocation. An IPC handle of type
        ``cu_ipc_mem_handle`` is constructed and initialized with
        ``cuIpcGetMemHandle``. A :class:`numba.cuda.IpcHandle` is returned,
        populated with the underlying ``ipc_mem_handle``.
        )offset)device_extentsr   r  cuIpcGetMemHandler0   handler   r   r   r  r  r  rI  	IpcHandler  )rQ   r  baseend	ipchandler  source_infos          r=   r  z GetIpcHandleMixin.get_ipc_handle  s     #6**	c 	00066I''#d))3FF022I$$U9%5%5t<<<](4/Fl)==??FK &( ( ( 	(r?   N)r)   rC   rD   r(  r  rE   r?   r=   r  r    s-         ( ( ( ( (r?   r  c                   :    e Zd ZdZd Zd Zd Zed             ZdS )NumbaCUDAMemoryManagerzInternal on-device memory management for Numba. This is implemented using
    the EMM Plugin interface, but is not part of the public API.c                 z    | j         j        t          k    r%|                                 j        | j         _        d S d S rL   )r  memory_capacity_SizeNotSetr  totalrU   s    r=   r  z!NumbaCUDAMemoryManager.initialize  s=     -<<151E1E1G1G1MD... =<r?   c                 p   t           rfd}|                     |          }n5t          j                    fd}|                     |           j        }t          | |          }t          j        | j                  }t          ||          }|| j
        |<   |                                S )Nc                  8    t                                          S rL   )r  
cuMemAlloc)r  s   r=   r  z2NumbaCUDAMemoryManager.memalloc.<locals>.allocator  s    ((...r?   c                  X    t                               t                                d S rL   )r  r  r   )r  r  s   r=   r  z2NumbaCUDAMemoryManager.memalloc.<locals>.allocator%  s%    !!%**d33333r?   r  )r   r  r   r   r  r  r  r  r  AutoFreePointerr  r  )rQ   r  r  r  r  r  r  r  s    `     @r=   r  zNumbaCUDAMemoryManager.memalloc  s     	"/ / / / / **955CII&((C4 4 4 4 4 4 $$Y///	I$T3	4@@	mDL))c3	BBB&)#wwyyr?   c                 *   t           rt                                          \  }}n_t                      }t                      }t                              t	          |          t	          |                     |j        }|j        }t          ||          S )N)freer  )r   r  cuMemGetInfor   r   r  
MemoryInfo)rQ   r	  r  s      r=   r  z&NumbaCUDAMemoryManager.get_memory_info1  sv     	  --//KD%%::DJJEdU5\\::::DKEt51111r?   c                     t           S rL   ) _SUPPORTED_EMM_INTERFACE_VERSIONrU   s    r=   r  z(NumbaCUDAMemoryManager.interface_version=  s    //r?   N)	r)   rC   rD   r(  r  r  r  r)  r  rE   r?   r=   r  r    sk        D DN N N  ,
2 
2 
2 0 0 X0 0 0r?   r  c                     t           rd S t          j        dk    r	t          a d S 	 t	          j        t          j                  } t          | j                   d S # t          $ r t          dt          j        z            w xY w)Ndefaultz$Failed to use memory manager from %s)
_memory_managerr   CUDA_MEMORY_MANAGERr  	importlibimport_moduleset_memory_manager_numba_memory_manager	ExceptionrK  )
mgr_modules    r=   _ensure_memory_managerr  G  s      !Y..07,V-GHH
:;<<<<< 7 7 7A!56 7 7 	77s   2A 'A?c                 |     | d          }|j         }|t          k    rd|t          fz  }t          |          | adS )a:  Configure Numba to use an External Memory Management (EMM) Plugin. If
    the EMM Plugin version does not match one supported by this version of
    Numba, a RuntimeError will be raised.

    :param mm_plugin: The class implementing the EMM Plugin.
    :type mm_plugin: BaseCUDAMemoryManager
    :return: None
    Nr  z9EMM Plugin interface has version %d - version %d required)r  r  rK  r  )	mm_plugindummyiverrs       r=   r  r  Y  sT     Id###E		 B	---I5673OOOr?   c                   (     e Zd ZdZ fdZd Z xZS )r  zC
    Dummy object for _PendingDeallocs when *size* is not set.
    c                 H    t                                          | d          S Nr   )rO   r   )r   r   r  rR   s      r=   r   z_SizeNotSet.__new__s  s    wwsA&&&r?   c                     dS )N?rE   rU   s    r=   rV   z_SizeNotSet.__str__v  s    sr?   )r)   rC   rD   r(  r   rV   rW   rX   s   @r=   r  r  n  sQ         ' ' ' ' '      r?   r  c                   ~    e Zd ZdZefdZed             ZefdZd Z	e
j        d             Zed             Zd Zd	S )
r  z
    Pending deallocations of a context (or device since we are using the primary
    context). The capacity defaults to being unset (_SizeNotSet) but can be
    modified later once the driver is initialized and the total memory capacity
    known.
    c                 V    t                      | _        d| _        d| _        || _        d S r!  )r   _cons_disable_count_sizer   )rQ   capacitys     r=   rP   z_PendingDeallocs.__init__  s*    WW

'r?   c                 D    t          | j        t          j        z            S rL   )r0   r   r   CUDA_DEALLOCS_RATIOrU   s    r=   _max_pending_bytesz#_PendingDeallocs._max_pending_bytes  s    4'&*DDEEEr?   c                 L   t                               d|j        |           | j                            |||f           | xj        t          |          z  c_        t          | j                  t          j	        k    s| j        | j
        k    r|                                  dS dS )a_  
        Add a pending deallocation.

        The *dtor* arg is the destructor function that takes an argument,
        *handle*.  It is used as ``dtor(handle)``.  The *size* arg is the
        byte size of the resource added.  It is an optional argument.  Some
        resources (e.g. CUModule) has an unknown memory footprint on the device.
        z add pending dealloc: %s %s bytesN)r   r   r)   r&  r{   r(  r0   r   r   CUDA_DEALLOCS_COUNTr,  r  rQ   dtorr  r  s       r=   add_itemz_PendingDeallocs.add_item  s     	7MMM
4.///

c$ii


OOf888
T444JJLLLLL 54r?   c                     | j         s`| j        rP| j                                        \  }}}t                              d|j        |            ||           | j        Pd| _        dS dS )zh
        Flush any pending deallocations unless it is disabled.
        Do nothing if disabled.
        zdealloc: %s %s bytesr   N)is_disabledr&  popleftr   r   r)   r(  r/  s       r=   r  z_PendingDeallocs.clear  s|    
  	* '+z'9'9';';$vt3T]DIIIV *  DJJJ	 	r?   c              #      K   | xj         dz  c_         	 dV  | xj         dz  c_         | j         dk    sJ dS # | xj         dz  c_         | j         dk    sJ w xY w)zs
        Context manager to temporarily disable flushing pending deallocation.
        This can be nested.
        r   Nr   r'  rU   s    r=   r  z_PendingDeallocs.disable  s       	q 	,EEE1$&!++++++ 1$&!++++++++s	   7 Ac                     | j         dk    S r!  r6  rU   s    r=   r3  z_PendingDeallocs.is_disabled  s    "Q&&r?   c                 *    t          | j                  S )z:
        Returns number of pending deallocations.
        )r   r&  rU   s    r=   __len__z_PendingDeallocs.__len__  s     4:r?   N)r)   rC   rD   r(  r  rP   r)  r,  r1  r  r  r  r  r3  r9  rE   r?   r=   r  r  }  s          !, ( ( ( ( F F XF +6     
 
 
 
, 
, 
, ' ' X'    r?   r  r  z
free,totalc                   
   e Zd ZdZd Zd Zd Z	 d(dZd Zd Z		 d(d	Z
d
 Zd Zd Zd Zd Zd Zd)dZd*dZd+dZd Zd Zd,dZd Zd Zd Zd Zd Zd Zd Zd  Zd! Zd)d"Z d# Z!e"j#        d$             Z$d% Z%d& Z&d' Z'dS )-r  zs
    This object wraps a CUDA Context resource.

    Contexts should not be constructed directly by user code.
    c                     || _         || _        t          j                    | _        t                      | _        t                       t          |           | _	        t          j                    | _
        i | _        d S )Nr  )r  r  r   r   r  r  r  r  r  memory_managermodulesextras)rQ   r  r  s      r=   rP   zContext.__init__  si     +---//   -d;;;'))r?   c                     t                               d| j        j                   | j                                         | j                                         | j                                         dS )z?
        Clean up all owned resources in this context.
        zreset context of device %sN)	r   r   r  rY  r<  r  r=  r  r  rU   s    r=   r  zContext.reset  sa    
 	14;>BBB!!###  """""r?   c                 4    | j                                         S )z>Returns (free, total) memory in bytes in the context.
        )r<  r  rU   s    r=   r  zContext.get_memory_info  s     "22444r?   Nc                 D    ||||f}t           r
 | j        | S  | j        | S )a  Return occupancy of a function.
        :param func: kernel for which occupancy is calculated
        :param blocksize: block size the kernel is intended to be launched with
        :param memsize: per-block dynamic shared memory usage intended, in bytes
        )r   -_cuda_python_active_blocks_per_multiprocessor(_ctypes_active_blocks_per_multiprocessor)rQ   func	blocksizememsizer  r   s         r=   $get_active_blocks_per_multiprocessorz,Context.get_active_blocks_per_multiprocessor  s;     i%0 	HE4EtLL@4@$GGr?   c                 |    |j         ||g}|st          j        | S |                    |           t          j        | S rL   )r  r  +cuOccupancyMaxActiveBlocksPerMultiprocessorr{   4cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags)rQ   rD  rE  rF  r  pss         r=   rB  z5Context._cuda_python_active_blocks_per_multiprocessor  sG    k9g. 	KErJJ
		%JBOOr?   c                     t                      }t          |          |j        ||f}|st          j        |  nt          j        |  |j        S rL   )r   r   r  r  rI  rJ  r  )rQ   rD  rE  rF  r  r  r   s          r=   rC  z0Context._ctypes_active_blocks_per_multiprocessor  sQ    ft{Iw? 	O>EEEGNN|r?   c                 F    |||||f}t           r
 | j        | S  | j        | S )aj  Suggest a launch configuration with reasonable occupancy.
        :param func: kernel for which occupancy is calculated
        :param b2d_func: function that calculates how much per-block dynamic
                         shared memory 'func' uses based on the block size.
                         Can also be the address of a C function.
                         Use `0` to pass `NULL` to the underlying CUDA API.
        :param memsize: per-block dynamic shared memory usage intended, in bytes
        :param blocksizelimit: maximum block size the kernel is designed to
                               handle
        )r   %_cuda_python_max_potential_block_size _ctypes_max_potential_block_size)rQ   rD  b2d_funcrF  blocksizelimitr  r   s          r=   get_max_potential_block_sizez$Context.get_max_potential_block_size  s=     h? 	@=4=tDD848$??r?   c                 *   t                      }t                      }t          |          }t          |          t          |          |j        |||g}	|st	          j        |	  n#|	                    |           t	          j        |	  |j        |j        fS rL   )	r   r   r   r  r   cuOccupancyMaxPotentialBlockSizer{   )cuOccupancyMaxPotentialBlockSizeWithFlagsr  )
rQ   rD  rP  rF  rQ  r  gridsizerE  b2d_cbr   s
             r=   rO  z(Context._ctypes_max_potential_block_size&  s    77GG	&x00hy!1!14;)  	D3T:::KK<dCC	00r?   c                 .    t          j        t          t                    |          }t                              |d          }t          j        |          }|j        |||g}	|st          j
        |	 S |	                    |           t          j        |	 S )Nlittle	byteorder)rh   r   r   r   r0   
from_bytesr   CUoccupancyB2DSizer  r  rT  r{   rU  )
rQ   rD  rP  rF  rQ  r  rW  r  driver_b2d_cbr   s
             r=   rN  z-Context._cuda_python_max_potential_block_size6  s    2!(E228<<nnVxn882377]G^D 	K:DAAKKCTJJr?   c                 8    | j                                          dS )zWInitialize the context for use.
        It's safe to be called multiple times.
        N)r<  r  rU   s    r=   prepare_for_usezContext.prepare_for_useC  s     	&&(((((r?   c                 l    t                               | j                   |                                  dS )z@
        Pushes this context on the current CPU Thread.
        N)r  cuCtxPushCurrentr  r`  rU   s    r=   pushzContext.pushI  s2     	,,,r?   c                     t                                           }t          r)t          |          t          | j                  k    sJ dS |j        | j        j        k    sJ dS )z
        Pops this context off the current CPU thread. Note that this context
        must be at the top of the context stack, otherwise an error will occur.
        N)r  r  r   r0   r  r  )rQ   r  s     r=   r  zContext.popP  sb    
 **,, 	5v;;#dk"2"2222222<4;#4444444r?   c                 6    | j                             |          S rL   )r<  r  )rQ   bytesizes     r=   r  zContext.memalloc[  s    "++H555r?   Tc                 8    | j                             ||          S rL   )r<  r  )rQ   rf  r  s      r=   r  zContext.memallocmanaged^  s    "228]KKKr?   Fc                 <    | j                             ||||          S rL   )r<  r  )rQ   rf  r  r  r  s        r=   r  zContext.memhostalloca  s    "//&(BOOOr?   c                     |r#| j         j        st          d| j         z            | j                            ||||          S )Nz%s cannot map host memory)r  CAN_MAP_HOST_MEMORYr   r<  r  r  s        r=   r  zContext.mempind  sK     	M$+9 	M!"="KLLL"))%$GGGr?   c                 b    t           st          d          | j                            |          S )z?
        Returns an *IpcHandle* from a GPU allocation.
        zOS does not support CUDA IPC)SUPPORTS_IPCrz   r<  r  r  s     r=   r  zContext.get_ipc_handlei  s2      	:8999"11&999r?   c                    d}t           rt                              ||          }n<t          j                    }t                              t          |          ||           t          t          j        |           ||          S )Nr   )r  r  r  )	r   r  r   r   r   r   MemoryPointerr  r  )rQ   r  r  r  dptrs        r=   open_ipc_handlezContext.open_ipc_handleq  s}     	B,,VU;;DD'))D%%eDkk65AAA W]4%8%8$"&( ( ( 	(r?   r   c                 \    |dk    s
J d            t                               ||           dS )zLEnable peer access between the current context and the peer context
        r   z$*flags* is reserved and MUST be zeroN)r  cuCtxEnablePeerAccess)rQ   peer_contextr  s      r=   enable_peer_accesszContext.enable_peer_access~  s4     zzzAzzz$$\599999r?   c                 $   t           r:t          j        |          }t                              | j        j        |          }nAt                      }t                              t          |          | j        j        |           t          |          S )zsReturns a bool indicating whether the peer access between the
        current and peer device is possible.
        )
r   r   CUdevicer  cuDeviceCanAccessPeerr  rY  r   r   bool)rQ   peer_devicecan_access_peers      r=   rz  zContext.can_access_peer  s      	G!*;77K$::4;>;FH HOO $ggO(()?)?)-G G G O$$$r?   c                     t          |t                    r|                    d          }t          r|}nt	          |          }|                     |          S Nutf8)r/   r+   encoder   r
   create_module_image)rQ   ptximages      r=   create_module_ptxzContext.create_module_ptx  sS    c3 	%**V$$C 	"EESMME''...r?   c                     t          | |          }t          r|j        }n|j        j        }|| j        |<   t          j        |          S rL   )load_module_imager   r  r  r=  r  r  )rQ   r  modulekeys       r=   r  zContext.create_module_image  sH    "4// 	&-CC-%C"S}V$$$r?   c                 L    t           r|j        }n|j        j        }| j        |= d S rL   )r   r  r  r=  )rQ   r  r  s      r=   unload_modulezContext.unload_module  s-     	&-CC-%CLr?   c                     t           rt          j        t                    }nt	          j        t          j                  }t          t          j        |           |d           S rL   )	r   r   CUstreamCU_STREAM_DEFAULTr   	cu_streamStreamr  r  rQ   r  s     r=   get_default_streamzContext.get_default_stream  sM     	@%&788FF%f&>??FgmD))64888r?   c                     t           rt          j        t          j                  }nt	          j        t          j                  }t          t          j        |           |d           S rL   )	r   r   r  CU_STREAM_LEGACYr   r  r  r  r  r  s     r=   get_legacy_default_streamz!Context.get_legacy_default_stream  sN     	?%g&>??FF%f&=>>FgmD))64888r?   c                     t           rt          j        t          j                  }nt	          j        t          j                  }t          t          j        |           |d           S rL   )	r   r   r  CU_STREAM_PER_THREADr   r  r  r  r  r  s     r=   get_per_thread_default_streamz%Context.get_per_thread_default_stream  sO     	C%g&BCCFF%f&ABBFgmD))64888r?   c                 T   t           r1t          j        j        j        }t
                              |          }n;t          j                    }t
                              t          |          d           t          t          j        |           |t          | j        |                    S r!  )r   r   CUstream_flagsr  r  r  cuStreamCreater   r  r   r  r  r  _stream_finalizerr  )rQ   r  r  s      r=   create_streamzContext.create_stream  s     		4
 *<BE**511FF%''F!!%--333gmD))6'(:FCCE E 	Er?   c                     t          |t                    st          d          t          rt	          j        |          }nt          j        |          }t          t          j
        |           |d d          S )Nz&ptr for external stream must be an intT)external)r/   r0   	TypeErrorr   r   r  r   r  r  r  r  )rQ   r  r  s      r=   create_external_streamzContext.create_external_stream  sx    #s## 	FDEEE 	+%c**FF%c**FgmD))64#% % % 	%r?   c                 P   d}|s|t           j        z  }t          rt                              |          }n;t          j                    }t                              t          |          |           t          t          j
        |           |t          | j        |                    S Nr   r  )r   CU_EVENT_DISABLE_TIMINGr   r  cuEventCreater   cu_eventr   Eventr  r  _event_finalizerr  )rQ   timingr  r  s       r=   create_eventzContext.create_event  s     	3U22E 	7))%00FF_&&F  v666W]4((&/0BFKKM M M 	Mr?   c                 8    t                                            d S rL   )r  cuCtxSynchronizerU   s    r=   synchronizezContext.synchronize  s    !!!!!r?   c              #      K   | j                                         5  | j                                        5  d V  d d d            n# 1 swxY w Y   d d d            d S # 1 swxY w Y   d S rL   )r<  r  r  r  rU   s    r=   r  zContext.defer_cleanup  s       ..00 	 	#++--                	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s3   AAAA	AA	AA#&A#c                 .    d| j         | j        j        fz  S )Nz<CUDA context %s of device %d>)r  r  rY  rU   s    r=   rw  zContext.__repr__  s    /4;2OOOr?   c                 Z    t          |t                    r| j        |j        k    S t          S rL   )r/   r  r  NotImplementedr  s     r=   r  zContext.__eq__  s)    eW%% 	";%,..!!r?   c                 .    |                      |           S rL   )r  r  s     r=   r  zContext.__ne__  s    ;;u%%%%r?   rL   )Tr  r  r'  )(r)   rC   rD   r(  rP   r  r  rG  rB  rC  rR  rO  rN  r`  rc  r  r  r  r  r  r  rp  rt  rz  r  r  r  r  r  r  r  r  r  r  r  r  r  rw  r  r  rE   r?   r=   r  r    s~        	 	 		# 	# 	#5 5 5 48H H H HP P P
 
 
 <@@ @ @ @$1 1 1 K K K) ) )  	5 	5 	56 6 6L L L LP P P PH H H H
: : :( ( (: : : :% % %/ / /% % %  9 9 99 9 99 9 9E E E% % %
M 
M 
M 
M" " "   
P P P" " "& & & & &r?   r  c                 P    t           rt          | |          S t          | |          S )!
    image must be a pointer
    )r   load_module_image_cuda_pythonload_module_image_ctypes)r  r  s     r=   r  r    s,      8,We<<<'777r?   c                    t           j        }t          |z              }t          |z              }t          j        t          |          t          j        t          |          t          j        t          |          t          j	        t          |          t          j
        t          t           j                  i}t          j        t          |          z  |                                 }t          t          |          z  |                                 }t          j                    }	 t$                              t)          |          |t          |          ||           nD# t*          $ r7}	d|j                            d          z  }
t+          |	j        |
          d }	~	ww xY w|j        }t3          t5          j        |           ||t9          | |                    S )NcuModuleLoadDataEx error:
%sr}  )r   CUDA_LOG_SIZEr	   r   CU_JIT_INFO_LOG_BUFFERr   !CU_JIT_INFO_LOG_BUFFER_SIZE_BYTESr   CU_JIT_ERROR_LOG_BUFFER"CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTESCU_JIT_LOG_VERBOSECUDA_VERBOSE_JIT_LOGr   cu_jit_optionr   keysr  	cu_moduler  cuModuleLoadDataExr   rJ   r  r_  rM   CtypesModuler  r  _module_finalizer)r  r  logszjitinfo	jiterrorsoptionsoption_keysoption_valsr  rx   rN   info_logs               r=   r  r    s    E~  G%""I 	$i&8&8/%%y';';0(5// (6+F"G"GG '#g,,6HKc'll*W^^-=-=>KF(!!%--G"-{	< 	< 	< 	< ( ( (-	0F0Fv0N0NN163'''( }Hg..)'6::< < <s   '8E   
F!*2FF!c           
         t           j        }t          |          }t          |          }t          j        }|j        ||j        ||j        ||j        ||j	        t           j
        i}d |                                D             }d |                                D             }	 t                              |t          |          ||          }	nA# t           $ r4}
|                    d          }d|z  }t!          |
j        |          d}
~
ww xY w|                    d          }t'          t)          j        |           |	|t-          | |	                    S )r  c                     g | ]}|S rE   rE   )ra   ks     r=   rd   z1load_module_image_cuda_python.<locals>.<listcomp>6  s    ---1---r?   c                     g | ]}|S rE   rE   )ra   vs     r=   rd   z1load_module_image_cuda_python.<locals>.<listcomp>7  s    ///1///r?   rR  r  N)r   r  	bytearrayr   CUjit_optionr  r  r  r  r  r  r  r  r  r  r   rJ   r_  rM   CudaPythonModuler  r  r  )r  r  r  r  r  
jit_optionr  r  r  r  rx   
err_stringrN   r  s                 r=   r  r  $  sX     EG%  I%J)74e*I5u%v'BG .-gllnn---K//gnn..///K(**5#g,,+68 8 ( ( (%%g..
-
:163'''(
 ~~g&&HGM'22FH-gv>>@ @ @s   #*C 
D/DDc                 >    | j         | j        fd}|S )Nc                  T    r =                      t          j                   d S rL   )r1  r  	cuMemFree)r  r  r  r  r  s   r=   corez_alloc_finalizer.<locals>.coreK  s4     	'I&v/d;;;;;r?   )r  r  )r<  r  r  r  r  r  r  s    ``` @@r=   r  r  G  sK     ,K"0M< < < < < < < < <
 Kr?   c                 T    | j         | j        st          fd}|S )a[  
    Finalize page-locked host memory allocated by `context.memhostalloc`.

    This memory is managed by CUDA, and finalization entails deallocation. The
    issues noted in `_pin_finalizer` are not relevant in this case, and the
    finalization is placed in the `context.deallocations` queue along with
    finalization of device objects.

    c                  X    rr =                      t          j                   d S rL   )r1  r  cuMemFreeHost)r  r  r  r  r  r  s   r=   r  z"_hostalloc_finalizer.<locals>.coreb  s;     	'k 	'I&v3S$?????r?   )r  r  r  )r<  r  r  r  r  r  r  r  s    ```` @@r=   r  r  S  sj     !,K"0M @ @ @ @ @ @ @ @ @ @
 Kr?   c                 ,    | j         fd}|S )aB  
    Finalize temporary page-locking of host memory by `context.mempin`.

    This applies to memory not otherwise managed by CUDA. Page-locking can
    be requested multiple times on the same memory, and must therefore be
    lifted as soon as finalization is requested, otherwise subsequent calls to
    `mempin` may fail with `CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED`, leading
    to unexpected behavior for the context managers `cuda.{pinned,mapped}`.
    This function therefore carries out finalization immediately, bypassing the
    `context.deallocations` queue.

    c                  J    rr = t                                          d S rL   )r  cuMemHostUnregister)r  r  r  r  s   r=   r  z_pin_finalizer.<locals>.corey  s4     	'k 	'I&""3'''''r?   )r  )r<  r  r  r  r  r  s    ``` @r=   r  r  j  s?     !,K( ( ( ( ( ( ( (
 Kr?   c                       fd}|S )Nc                  H                          t          j                   d S rL   )r1  r  cuEventDestroydeallocsr  s   r=   r  z_event_finalizer.<locals>.core  s"    &/88888r?   rE   r  r  r  s   `` r=   r  r    s)    9 9 9 9 9 9 Kr?   c                       fd}|S )Nc                  H                          t          j                   d S rL   )r1  r  cuStreamDestroyr  s   r=   r  z_stream_finalizer.<locals>.core  s"    &0&99999r?   rE   r  s   `` r=   r  r    s)    : : : : : : Kr?   c                 \    | j         | j        t          rnj        fd}|S )Nc                  \    t           j        fd}                     |            d S )Nc                 \                 svsJ t                               |            d S rL   )r  cuModuleUnload)r  r  r=  shutting_downs    r=   module_unloadz6_module_finalizer.<locals>.core.<locals>.module_unload  s;     !=??8c&8&8&8&8!!&)))))r?   )r   r  r1  )r  r  deallocr  r  r=  s    @r=   r  z_module_finalizer.<locals>.core  sM    +	* 	* 	* 	* 	* 	* 	* 	/////r?   )r  r=  r   r  )r  r  r  r  r  r=  s    ` @@@r=   r  r    sZ    #GoG l
0 
0 
0 
0 
0 
0 
0 
0 Kr?   c                   $    e Zd ZdZd Zd Zd ZdS )_CudaIpcImplzjImplementation of GPU IPC using CUDA driver API.
    This requires the devices to be peer accessible.
    c                 t    |j         | _         |j        | _        |j        | _        |j        | _        d | _        d S rL   )r  r  r  r  _opened_mem)rQ   parents     r=   rP   z_CudaIpcImpl.__init__  s4    K	mK	mr?   c                    | j         t          d          | j        t          d          |                    | j        | j        | j        z             }|| _        |                                                    | j                  S )T
        Import the IPC memory and returns a raw CUDA memory pointer object
        Nz'opening IpcHandle from original processIpcHandle is already opened)	r  rl   r  rp  r  r  r  r  view)rQ   r  r  s      r=   openz_CudaIpcImpl.open  sy     9 FGGG':;;;%%dk4;3JKK wwyy~~dk***r?   c                     | j         t          d          t                              | j         j                   d | _         d S NzIpcHandle not opened)r  rl   r  cuIpcCloseMemHandler  rU   s    r=   closez_CudaIpcImpl.close  sB    #3444""4#3#:;;;r?   Nr)   rC   rD   r(  rP   r  r  rE   r?   r=   r  r    sK              + + +"         r?   r  c                   $    e Zd ZdZd Zd Zd ZdS )_StagedIpcImplzImplementation of GPU IPC using custom staging logic to workaround
    CUDA IPC limitation on peer accessibility between devices.
    c                 j    || _         |j        | _        |j        | _        |j        | _        || _        d S rL   )r  r  r  r  r  )rQ   r  r  s      r=   rP   z_StagedIpcImpl.__init__  s2    K	mK	&r?   c                 4   ddl m} t                              | j                  }t
          rt          |j                  }n|j        }t          | j	                  }|j
        |         5  |                    |j                                                  }d d d            n# 1 swxY w Y   |                    | j                  }t!          ||| j                   |j
        |         5  |                                 d d d            n# 1 swxY w Y   |S )Nr   r    )r  )numbar!   r  rO  r  r   r0   rY  r  r  gpusr  r   get_contextr  r  device_to_devicer  )rQ   r  r!   srcdev	srcdev_idimpl
source_ptrnewmems           r=   r  z_StagedIpcImpl.open  s   %%d&677 	"FIII	I4;///Yy! 	? 	?4<#;#;#=#=>>J	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	? !!$),, 	TY777 Yy! 	 	JJLLL	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 s$   +-B$$B(+B(,DDDc                     d S rL   rE   rU   s    r=   r  z_StagedIpcImpl.close  s    r?   Nr  rE   r?   r=   r  r    sK         ' ' '  4    r?   r  c                   b    e Zd ZdZddZd Zd Zd Zd Zd	 Z	dd
Z
d Zd Zed             ZdS )r  ao  
    CUDA IPC handle. Serialization of the CUDA IPC handle object is implemented
    here.

    :param base: A reference to the original allocation to keep it alive
    :type base: MemoryPointer
    :param handle: The CUDA IPC handle, as a ctypes array of bytes.
    :param size: Size of the original allocation
    :type size: int
    :param source_info: The identity of the device on which the IPC handle was
                        opened.
    :type source_info: dict
    :param offset: The offset into the underlying allocation of the memory
                   referred to by this IPC handle.
    :type offset: int
    Nr   c                 Z    || _         || _        || _        || _        d | _        || _        d S rL   )r  r  r  r  _implr  )rQ   r  r  r  r  r  s         r=   rP   zIpcHandle.__init__  s1    		&
r?   c                 2    | j         t          d          d S )Nz#IPC handle doesn't have source info)r  rK  rU   s    r=   _sentry_source_infozIpcHandle._sentry_source_info  s"    #DEEE $#r?   c                     |                                   | j        |j                                        k    rdS t                              | j                  }|                    |j                  S )zdReturns a bool indicating whether the active context can peer
        access the IPC handle
        T)r  r  r  rI  r  rO  rz  rY  )rQ   r  source_devices      r=   rz  zIpcHandle.can_access_peer  sd     	  """w~AACCCC4,,T-=>>&&}'7888r?   c                     |                                   | j        t          d          t          | | j                  | _        | j                            |          S )zCOpen the IPC by allowing staging on the host memory first.
        Nr  )r  r  rl   r  r  r  rQ   r  s     r=   open_stagedzIpcHandle.open_staged  sU     	  """:!:;;;#D$*:;;
zw'''r?   c                     | j         t          d          t          |           | _         | j                             |          S )r  Nr  )r  rl   r  r  r  s     r=   open_directzIpcHandle.open_direct'  s>     :!:;;;!$''
zw'''r?   c                 n    | j         |                     |          r| j        }n| j        } ||          S )a  Open the IPC handle and import the memory for usage in the given
        context.  Returns a raw CUDA memory pointer object.

        This is enhanced over CUDA IPC that it will work regardless of whether
        the source device is peer-accessible by the destination device.
        If the devices are peer-accessible, it uses .open_direct().
        If the devices are not peer-accessible, it uses .open_staged().
        )r  rz  r  r  )rQ   r  fns      r=   r  zIpcHandle.open1  s@     #t';';G'D'D#!BB!Br'{{r?   c                 |    ddl m} ||j        }|                     |          }|                    ||||          S )zC
        Similar to `.open()` but returns an device array.
        r   )devicearrayN)shapestridesdtypegpu_data)r   r  itemsizer  DeviceNDArray)rQ   r  r  r  r  r  ro  s          r=   
open_arrayzIpcHandle.open_array@  s^     	"!!!!! ?nGyy!!((ug/4t ) E E 	Er?   c                 r    | j         t          d          | j                                          d | _         d S r  )r  rl   r  rU   s    r=   r  zIpcHandle.closeN  s8    :3444



r?   c                     t           r| j        j        }nt          | j                  }| j        || j        | j        | j        f}t          j	        |fS rL   )
r   r  reservedrb  rR   r  r  r  r   _rebuild_reduction)rQ   preprocessed_handler   s      r=   
__reduce__zIpcHandle.__reduce__T  sW     	5"&+"6"'"4"4NIK
 ,d33r?   c                     t           rt          j                    }||_        nt	          j        | } | d ||||          S )N)r  r  r  r  r  )r   r   CUipcMemHandler$  r   r   )r   
handle_aryr  r  r  r  s         r=   _rebuildzIpcHandle._rebuildc  sR     	;+--F(FOO-z:FsV$*6; ; ; 	;r?   r!  rL   )r)   rC   rD   r(  rP   r  rz  r  r  r  r!  r  r'  r  r+  rE   r?   r=   r  r    s             F F F9 9 9	( 	( 	(( ( (  E E E E  4 4 4 ; ; [; ; ;r?   r  c                   |    e Zd ZdZdZddZed             Zd Zd Z	dd	Z
dd
Zed             Zed             ZdS )rn  a  A memory pointer that owns a buffer, with an optional finalizer. Memory
    pointers provide reference counting, and instances are initialized with a
    reference count of 1.

    The base ``MemoryPointer`` class does not use the
    reference count for managing the buffer lifetime. Instead, the buffer
    lifetime is tied to the memory pointer instance's lifetime:

    - When the instance is deleted, the finalizer will be called.
    - When the reference count drops to 0, no action is taken.

    Subclasses of ``MemoryPointer`` may modify these semantics, for example to
    tie the buffer lifetime to the reference count, so that the buffer is freed
    when there are no more references.

    :param context: The context in which the pointer was allocated.
    :type context: Context
    :param pointer: The address of the buffer.
    :type pointer: ctypes.c_void_p
    :param size: The size of the allocation in bytes.
    :type size: int
    :param owner: The owner is sometimes set by the internals of this class, or
                  used for Numba's internal memory management. It should not be
                  provided by an external user of the ``MemoryPointer`` class
                  (e.g. from within an EMM Plugin); the default of `None`
                  should always suffice.
    :type owner: NoneType
    :param finalizer: A function that is called when the buffer is to be freed.
    :type finalizer: function
    TNc                     || _         || _        || _        || _        |d u| _        d| _        | j        | _        || _        |t          j	        | |          | _
        d S d S Nr   )r  device_pointerr  _cuda_memsize_
is_managedrefctr  _ownerr  finalize
_finalizerrQ   r  r  r  r  r  s         r=   rP   zMemoryPointer.__init__  sj    %	"#4/
) %.tY??DOOO ! r?   c                 "    | j         | n| j         S rL   )r3  rU   s    r=   r  zMemoryPointer.owner  s    {*tt;r?   c                 D    t          t          j        |                     S rL   )OwnedPointerr  r  rU   s    r=   r  zMemoryPointer.own  s    GM$//000r?   c                     | j         r=| j        j        st          d          |                                  | j        j        rJ dS dS )z8
        Forces the device memory to the trash.
        zFreeing dead memoryN)r1  r5  aliverK  rU   s    r=   r	  zMemoryPointer.free  s]     ? 	-?( :"#8999OO,,,,		- 	- -,r?   r   c                     || j         n|}|r)t                              | j        |||j                   d S t                              | j        ||           d S rL   )r  r  cuMemsetD8Asyncr/  r  
cuMemsetD8)rQ   byter  streams       r=   memsetzMemoryPointer.memset  sm    "]		 	@""4#6e#)=2 2 2 2 2 d14?????r?   c                 >   || j         |z
  }n||z
  }| j        s|dk    rt          d          | }n| j        |z   }|dk     rt          d          t          rLt	          j                    }t          j                            |	                                          }||_
        nt          j        |          }t          | j        ||| j                  }t          | j        t          t          f          r't          t!          j        | j                  |          S |S )Nr   z non-empty slice into empty slicezsize cannot be negative)r  )r  device_pointer_valuerK  r   r   CUdeviceptrr   r   from_addressgetPtrr  rn  r  r  r/   r9  r  r  )rQ   startstopr  r  r  r  
ctypes_ptrs           r=   r  zMemoryPointer.view  s   <9u$DD%<D ( 	Pqyy"#EFFFDD ,u4Daxx"#<=== 5!-//#1>>w~~?O?OPP
#'
   .t44 wDJOOODdj=,"?@@ 	dj 9 94@@@ Kr?   c                     | j         S rL   )r/  rU   s    r=   device_ctypes_pointerz#MemoryPointer.device_ctypes_pointer  s    ""r?   c                 T    t           rt          | j                  pd S | j        j        S rL   )r   r0   r/  r  rU   s    r=   rC  z"MemoryPointer.device_pointer_value  s,     	-t*++3t3&,,r?   NNr!  rL   )r)   rC   rD   r(  __cuda_memory__rP   r)  r  r  r	  rA  r  rK  rC  rE   r?   r=   rn  rn  n  s         < O@ @ @ @ < < X<1 1 1- - -@ @ @ @   > # # X# - - X- - -r?   rn  c                   "     e Zd ZdZ fdZ xZS )r  a  Modifies the ownership semantic of the MemoryPointer so that the
    instance lifetime is directly tied to the number of references.

    When the reference count reaches zero, the finalizer is invoked.

    Constructor arguments are the same as for :class:`MemoryPointer`.
    c                 h     t          t          |           j        |i | | xj        dz  c_        d S r.  )rO   r  rP   r2  r  s      r=   rP   zAutoFreePointer.__init__  s:    -ot$$-t>v>>> 	

a



r?   )r)   rC   rD   r(  rP   rW   rX   s   @r=   r  r    sB                 r?   r  c                   .     e Zd ZdZdZd fd	Zd Z xZS )r  a=  A memory pointer that refers to a buffer on the host that is mapped into
    device memory.

    :param context: The context in which the pointer was mapped.
    :type context: Context
    :param pointer: The address of the buffer.
    :type pointer: ctypes.c_void_p
    :param size: The size of the buffer in bytes.
    :type size: int
    :param owner: The owner is sometimes set by the internals of this class, or
                  used for Numba's internal memory management. It should not be
                  provided by an external user of the ``MappedMemory`` class
                  (e.g. from within an EMM Plugin); the default of `None`
                  should always suffice.
    :type owner: NoneType
    :param finalizer: A function that is called when the buffer is to be freed.
    :type finalizer: function
    TNc                    || _         || _        t          r(t                              |d          }| j        | _        nMt          j                    }t                              t          |          |d           | j        j	        | _        || _
        t          t          |                               ||||           | j        | _        | j        | _        d S r  )ownedhost_pointerr   r  cuMemHostGetDevicePointer_bufptr_r   r   r   r  r/  rO   r  rP   r  r  _buflen_rQ   r  r  r  r  r  devptrrR   s          r=   rP   zMappedMemory.__init__  s    
# 	455gqAAF -DMM)++F,,U6]]GQGGG -3DM$lD!!**7FD5> 	+ 	@ 	@ 	@' 	r?   c                 D    t          t          j        |                     S rL   )MappedOwnedPointerr  r  rU   s    r=   r  zMappedMemory.own  s    !'-"5"5666r?   rM  r)   rC   rD   r(  rN  rP   r  rW   rX   s   @r=   r  r    s]         & O" " " " " "(7 7 7 7 7 7 7r?   r  c                        e Zd ZdZddZd ZdS )r  a*  A pointer to a pinned buffer on the host.

    :param context: The context in which the pointer was mapped.
    :type context: Context
    :param owner: The object owning the memory. For EMM plugin implementation,
                  this ca
    :param pointer: The address of the buffer.
    :type pointer: ctypes.c_void_p
    :param size: The size of the buffer in bytes.
    :type size: int
    :param owner: An object owning the buffer that has been pinned. For EMM
                  plugin implementation, the default of ``None`` suffices for
                  memory allocated in ``memhostalloc`` - for ``mempin``, it
                  should be the owner passed in to the ``mempin`` method.
    :param finalizer: A function that is called when the buffer is to be freed.
    :type finalizer: function
    Nc                     || _         || _        || _        || _        |d u| _        | j        | _        | j        | _        t          r| j        | _        n| j        j	        | _        |t          j        | |           d S d S rL   )r  rS  r  rT  r1  r  rW  r   rV  r  r  r4  r6  s         r=   rP   zPinnedMemory.__init__0  s    
	##4/' 	 	4 -DMM -3DM T9----- ! r?   c                     | S rL   rE   rU   s    r=   r  zPinnedMemory.ownB  s    r?   rM  )r)   rC   rD   r(  rP   r  rE   r?   r=   r  r    sA         $. . . .$    r?   r  c                   .     e Zd ZdZdZd fd	Zd Z xZS )r  aI  A memory pointer that refers to a managed memory buffer (can be accessed
    on both host and device).

    :param context: The context in which the pointer was mapped.
    :type context: Context
    :param pointer: The address of the buffer.
    :type pointer: ctypes.c_void_p
    :param size: The size of the buffer in bytes.
    :type size: int
    :param owner: The owner is sometimes set by the internals of this class, or
                  used for Numba's internal memory management. It should not be
                  provided by an external user of the ``ManagedMemory`` class
                  (e.g. from within an EMM Plugin); the default of `None`
                  should always suffice.
    :type owner: NoneType
    :param finalizer: A function that is called when the buffer is to be freed.
    :type finalizer: function
    TNc                     || _         |}t                                          ||||           | j        | _        t
          r| j        | _        d S | j        j        | _        d S )Nr  )	rS  rO   rP   r  rW  r   r/  rV  r  rX  s          r=   rP   zManagedMemory.__init__\  sb    
&$)DDD 	 	6 /DMMM /5DMMMr?   c                 D    t          t          j        |                     S rL   )ManagedOwnedPointerr  r  rU   s    r=   r  zManagedMemory.ownh  s    "7=#6#6777r?   rM  r\  rX   s   @r=   r  r  F  s]         & O
6 
6 
6 
6 
6 
68 8 8 8 8 8 8r?   r  c                       e Zd ZddZd ZdS )r9  Nc                     || _         || j         | _        n|j        rJ || _        | j         fd}| j         xj        dz  c_        t	          j        | |           d S )Nc                      	  xj         dz  c_          j         dk    sJ  j         dk    r                                  d S d S # t          $ r Y d S w xY w)Nr   r   )r2  r	  ReferenceError)r  s   r=   derefz$OwnedPointer.__init__.<locals>.derefx  sk    		Q		yA~~~~9>>HHJJJJJ ">!   s   <A 
AAr   )_mem_viewr1  r2  r  r4  )rQ   memptrr  rh  r  s       @r=   rP   zOwnedPointer.__init__m  s|    	<DJJ&&&DJi	 	 	 	 	 		1u%%%%%r?   c                 ,    t          | j        |          S )z$Proxy MemoryPointer methods
        )r.   rj  r   s     r=   r   zOwnedPointer.__getattr__  s     tz5)))r?   rL   )r)   rC   rD   rP   r   rE   r?   r=   r9  r9  l  s7        & & & &0* * * * *r?   r9  c                       e Zd ZdS )r[  NrB   rE   r?   r=   r[  r[    rF   r?   r[  c                       e Zd ZdS )rc  NrB   rE   r?   r=   rc  rc    rF   r?   rc  c                       e Zd ZddZd Zd Zd Zej        d             Z	ddZ
eed	                         Zd
ej        j        fdZdS )r  Fc                 b    || _         || _        || _        |t          j        | |           d S d S rL   )r  r  r  r  r4  )rQ   r  r  r  r  s        r=   rP   zStream.__init__  s>      T9----- ! r?   c                 h    t           rt          | j                  S | j        j        pt          j        S rL   )r   r0   r  r  r   r  rU   s    r=   __int__zStream.__int__  s0     	At{### ;$@(@@r?   c                 b   t           r8t          dt          j        dt          j        di}t          | j                  pd}n>t          j        dt          j        dt          j        di}| j        j        pt          j        }||v r||         | j	        z  S | j
        rd|| j	        fz  S d|| j	        fz  S )Nz<Default CUDA stream on %s>z"<Legacy default CUDA stream on %s>z&<Per-thread default CUDA stream on %s>r   z<External CUDA stream %d on %s>z<CUDA stream %d on %s>)r   r  r   r  r  r0   r  r   r  r  r  )rQ   default_streamsr  s      r=   rw  zStream.__repr__  s     	@!#@(8,<O dk""'aCC (*G')M+<	O +#?v'?C/!!"3'$,66] 	B4T\7JJJ+sDL.AAAr?   c                 D    t                               | j                   dS )zy
        Wait for all commands in this stream to execute. This will commit any
        pending memory transfers.
        N)r  cuStreamSynchronizer  rU   s    r=   r  zStream.synchronize  s     
 	""4;/////r?   c              #   :   K   | V  |                                   dS )z
        A context manager that waits for all commands in this stream to execute
        and commits any pending memory transfers upon exiting the context.
        N)r  rU   s    r=   auto_synchronizezStream.auto_synchronize  s)       


r?   Nc                    | ||f}t          |           t          rEt                              | j        d          }t          j        |          }t          |          }n| j        }t          	                    | j
        ||d           dS )ai  
        Add a callback to a compute stream.
        The user provided function is called from a driver thread once all
        preceding stream operations are complete.

        Callback functions are called from a CUDA driver thread, not from
        the thread that invoked `add_callback`. No CUDA API functions may
        be called from within the callback function.

        The duration of a callback function should be kept short, as the
        callback will block later work in the stream and may block other
        callbacks from being executed.

        Note: The driver function underlying this method is marked for
        eventual deprecation and may be replaced in a future CUDA release.

        :param callback: Callback function with arguments (stream, status, arg).
        :param arg: Optional user data to be passed to the callback function.
        rY  rZ  r   N)
_py_increfr   r0   r\  _stream_callbackr   CUstreamCallbackrY  r  cuStreamAddCallbackr  )rQ   callbackr   datar  stream_callbacks         r=   add_callbackzStream.add_callback  s    ( h$4 	4..!6(.KKC%6s;;Od88DD"3O""4;qIIIIIr?   c                     	 |\  }}} ||||           n.# t           $ r!}t          j        d|            Y d }~nd }~ww xY wt          |           d S # t          |           w xY w)NzException in stream callback: )r  warningswarn
_py_decref)r  statusr  r@  r~  r   rx   s          r=   r{  zStream._stream_callback  s    	$(!FHcHVVS)))) 	@ 	@ 	@M>1>>????????	@ tJts(    A 
A<A AA A&returnc                      t          j                                                    } fdfd}                     ||           |S )z
        Return an awaitable that resolves once all preceding stream operations
        are complete. The result of the awaitable is the current stream.
        c                     |                                  rd S |dk    r|                                d S |                     t          d|                      d S )Nr   zStream error )done
set_resultset_exceptionr  )futurer  rQ   s     r=   resolverz#Stream.async_done.<locals>.resolver  sg    {{}} J1!!$'''''$$Y/Gv/G/G%H%HIIIIIr?   c                 6                         ||           d S rL   )call_soon_threadsafe)r@  r  r  loopr  s      r=   r~  z#Stream.async_done.<locals>.callback	  s!    %%h?????r?   )asyncioget_running_loopcreate_futurer  )rQ   r  r~  r  r  s   `  @@r=   
async_donezStream.async_done  s    
 '))##%%	J 	J 	J 	J 	J	@ 	@ 	@ 	@ 	@ 	@ 	(F+++r?   r  rL   )r)   rC   rD   rP   rr  rw  r  r  r  rx  r  staticmethodr   r{  r  futuresFuturer  rE   r?   r=   r  r    s        . . . .A A AB B B40 0 0   J J J J>    \GO2      r?   r  c                   8    e Zd Zd	dZd Zd
dZd Zd
dZd ZdS )r  Nc                 T    || _         || _        |t          j        | |           d S d S rL   )r  r  r  r4  )rQ   r  r  r  s       r=   rP   zEvent.__init__	  s7     T9----- ! r?   c                     	 t                               | j                   dS # t          $ r!}|j        t
          j        k    rY d}~dS  d}~ww xY w)zy
        Returns True if all work before the most recent record has completed;
        otherwise, returns False.
        TNF)r  cuEventQueryr  rJ   rM   r   CUDA_ERROR_NOT_READY)rQ   rx   s     r=   queryzEvent.query	  se    
	,,, 4  	 	 	v333uuuuu		s   # 
AA	A		Ar   c                     t           r|r|j        nt          j        d          }n|r|j        nd}t                              | j        |           dS )a  
        Set the record point of the event to the current point in the given
        stream.

        The event will be considered to have occurred when all work that was
        queued in the stream at the time of the call to ``record()`` has been
        completed.
        r   N)r   r  r   r  r  cuEventRecord)rQ   r@  hstreams      r=   recordzEvent.record#	  sW      	5'-Ffmm73CA3F3FGG'-4fmm1GT['22222r?   c                 D    t                               | j                   dS )zN
        Synchronize the host thread for the completion of the event.
        N)r  cuEventSynchronizer  rU   s    r=   r  zEvent.synchronize2	  s      	!!$+.....r?   c                     t           r|r|j        nt          j        d          }n|r|j        nd}d}t                              || j        |           dS )zZ
        All future works submitted to stream will wait util the event completes.
        r   N)r   r  r   r  r  cuStreamWaitEvent)rQ   r@  r  r  s       r=   waitz
Event.wait8	  s^      	5'-Ffmm73CA3F3FGG'-4fmm1G  $+u=====r?   c                 "    t          | |          S rL   )event_elapsed_time)rQ   evtends     r=   elapsed_timezEvent.elapsed_timeC	  s    !$///r?   rL   r'  )	r)   rC   rD   rP   r  r  r  r  r  rE   r?   r=   r  r  	  s}        . . . .  3 3 3 3/ / /	> 	> 	> 	>0 0 0 0 0r?   r  c                     t           r%t                              | j        |j                  S t	                      }t                              t          |          | j        |j                   |j        S )zF
    Compute the elapsed time between two events in milliseconds.
    )r   r  cuEventElapsedTimer  r   r   r  )evtstartr  msecs      r=   r  r  G	  sY      ((&-HHHyy!!%++xNNNzr?   c                   L    e Zd ZdZddZd Zed             Zed             ZdS )ModulezAbstract base class for modulesNc                 l    || _         || _        || _        |t          j        | |          | _        d S d S rL   )r  r  r  r  r4  r5  )rQ   r  r  r  r  s        r=   rP   zModule.__init__V	  s=      %.tY??DOOO ! r?   c                 :    | j                             |            dS )z#Unload this module from the contextN)r  r  rU   s    r=   unloadzModule.unload]	  s    ""4(((((r?   c                     dS )z:Returns a Function object encapsulating the named functionNrE   rQ   r   s     r=   get_functionzModule.get_functiona	  r  r?   c                     dS )z4Return a MemoryPointer referring to the named symbolNrE   r  s     r=   get_global_symbolzModule.get_global_symbole	  r  r?   rL   )	r)   rC   rD   r(  rP   r  r   r  r  rE   r?   r=   r  r  S	  s        ))@ @ @ @) ) ) I I ^I C C ^C C Cr?   r  c                       e Zd Zd Zd ZdS )r  c                     t          j                    }t                              t	          |          | j        |                    d                     t          t          j	        |           ||          S r|  )
r   cu_functionr  cuModuleGetFunctionr   r  r~  CtypesFunctionr  r  rQ   r   r  s      r=   r  zCtypesModule.get_functionl	  s_    #%%""5==$+#';;v#6#6	8 	8 	8gmD1164@@@r?   c                 &   t          j                    }t          j                    }t                              t          |          t          |          | j        |                    d                     t          | j	        ||          |j
        fS r|  )r   r   r   r  cuModuleGetGlobalr   r  r~  rn  r  r  rQ   r   r  r  s       r=   r  zCtypesModule.get_global_symbolr	  ss    "$$    sU4[[$+!%V!4!4	6 	6 	6T\355tzAAr?   Nr)   rC   rD   r  r  rE   r?   r=   r  r  j	  s:        A A AB B B B Br?   r  c                       e Zd Zd Zd ZdS )r  c                     t                               | j        |                    d                    }t	          t          j        |           ||          S r|  )r  r  r  r~  CudaPythonFunctionr  r  r  s      r=   r  zCudaPythonModule.get_function|	  sA    ++DKV9L9LMM!'-"5"5vtDDDr?   c                     t                               | j        |                    d                    \  }}t	          | j        ||          |fS r|  )r  r  r  r~  rn  r  r  s       r=   r  z"CudaPythonModule.get_global_symbol	  sB    ,,T[$++f:M:MNN	TT\355t;;r?   Nr  rE   r?   r=   r  r  z	  s5        E E E< < < < <r?   r  FuncAttr)regssharedrA  const
maxthreadsc                       e Zd ZdZdZdZdZd Zd Ze	d             Z
e	 	 dd            Zed             Zed	             Zd
S )Function)r   r   r   r   c                 b    || _         || _        || _        |                                 | _        d S rL   )r  r  r   read_func_attr_allattrs)rQ   r  r  r   s       r=   rP   zFunction.__init__	  s.    	,,..


r?   c                     d| j         z  S )Nz<CUDA function %s>)r   rU   s    r=   rw  zFunction.__repr__	  s    #di//r?   c                 $    | j         j        j        S rL   )r  r  r  rU   s    r=   r  zFunction.device	  s    {"))r?   Fc                     dS )z.Set the cache configuration for this function.NrE   )rQ   prefer_equalprefer_cacheprefer_shareds       r=   cache_configzFunction.cache_config	  r  r?   c                     dS )z0Return the value of the attribute with given ID.NrE   rQ   attrids     r=   read_func_attrzFunction.read_func_attr	  r  r?   c                     dS )zPReturn a FuncAttr object with the values of various function
        attributes.NrE   rU   s    r=   r  zFunction.read_func_attr_all	  r  r?   Nr  )r)   rC   rD   griddimblockdimr@  	sharedmemrP   rw  r)  r  r   r  r  r  rE   r?   r=   r  r  	  s        GHFI/ / /0 0 0 * * X* <A#(= = = ^= ? ? ^?   ^  r?   r  c                   &    e Zd Z	 	 ddZd Zd ZdS )r  Fc                     |p|o|}|rt           j        }n*|rt           j        }n|rt           j        }nt           j        }t
                              | j        |           d S rL   )r   CU_FUNC_CACHE_PREFER_EQUALCU_FUNC_CACHE_PREFER_L1CU_FUNC_CACHE_PREFER_SHAREDCU_FUNC_CACHE_PREFER_NONEr  cuFuncSetCacheConfigr  )rQ   r  r  r  flags        r=   r  zCtypesFunction.cache_config	  sm    #G(F 	33DD 	30DD 	34DD2D##DK66666r?   c                     t                      }t                              t          |          || j                   |j        S rL   )r   r  cuFuncGetAttributer   r  r  )rQ   r  r  s      r=   r  zCtypesFunction.read_func_attr	  s3    !!%--EEE|r?   c                 `   |                      t          j                  }|                      t          j                  }|                      t          j                  }|                      t          j                  }|                      t          j                  }t          |||||          S N)r  r  rA  r  r  )r  r   CU_FUNC_ATTRIBUTE_NUM_REGS"CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES"CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES#CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES'CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCKr  )rQ   nregscmemlmemsmemmaxtpbs         r=   r  z!CtypesFunction.read_func_attr_all	  s    ##E$DEE""5#KLL""5#KLL""5#LMM$$9; ;U$d4#)+ + + 	+r?   Nr  r)   rC   rD   r  r  r  rE   r?   r=   r  r  	  sL        <A#(7 7 7 7  
+ + + + +r?   r  c                   &    e Zd Z	 	 ddZd Zd ZdS )r  Fc                     |p|o|}t           j        }|r|j        }n|r|j        }n|r|j        }n|j        }t                              | j        |           d S rL   )	r   CUfunction_attributer  r  r  r  r  r  r  )rQ   r  r  r  r|  r  s         r=   r  zCudaPythonFunction.cache_config	  su    #G(F+ 	22DD 	2/DD 	23DD1D##DK66666r?   c                 B    t                               || j                  S rL   )r  r  r  r  s     r=   r  z!CudaPythonFunction.read_func_attr	  s    ((===r?   c                 F   t           j        }|                     |j                  }|                     |j                  }|                     |j                  }|                     |j                  }|                     |j                  }t          |||||          S r  )	r   r  r  r  r  r  r  r  r  )rQ   r|  r  r  r  r  r  s          r=   r  z%CudaPythonFunction.read_func_attr_all	  s    +##D$CDD""4#JKK""4#JKK""4#KLL$$8: :U$d4#)+ + + 	+r?   Nr  r  rE   r?   r=   r  r  	  sL        <A#(7 7 7 7> > >	+ 	+ 	+ 	+ 	+r?   r  Fc                     d |	D             }t          t          |          z  | }t          rt          |          }d}n|}d }|
r%t                              | |||||||||
  
         d S t                              | ||||||||||           d S )Nc                 ,    g | ]}t          |          S rE   )r   r   s     r=   rd   z!launch_kernel.<locals>.<listcomp>	  s    111S)C..111r?   r   )r   r   r   r   r  cuLaunchCooperativeKernelcuLaunchKernel)cufunc_handlegxgygzbxbybzr  r  r   cooperative
param_ptrsparamsparams_for_launchextras                  r=   launch_kernelr  	  s     21D111JZ(:6F %f--" %(()+R)+R)2)0):	< 	< 	< 	< 	< 	m "b "b'%/#	% 	% 	% 	% 	%r?   )or  ar   cubinfatbinc                       e Zd ZdZedd            Zed             Zeed                         Z	eed                         Z
ed	             Zd
 Zed             Zd Zd Zed             ZdS )LinkerzAbstract base class for linkersr   FNc                     t           j        rt          |||          S t          rt	          |||          S t          |||          S rL   )r   'CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY	MVCLinkerr   CudaPythonLinkerCtypesLinker)r   max_registerslineinfoccs       r=   newz
Linker.new$
  sK    9 	=]Hb999 	=#M8R@@@x<<<r?   c                     d| _         d S r  )lto)rQ   r  r  r  s       r=   rP   zLinker.__init__-
  s     r?   c                     dS )z.Return the info log from the linker invocationNrE   rU   s    r=   r  zLinker.info_log3
  r  r?   c                     dS )z/Return the error log from the linker invocationNrE   rU   s    r=   	error_logzLinker.error_log8
  r  r?   c                     dS )z&Add PTX source in a string to the linkNrE   )rQ   r  r   s      r=   add_ptxzLinker.add_ptx=
  r  r?   c                 &   t                                           5 }t                               |j                  }|j        }ddd           n# 1 swxY w Y   t          j        |||          \  }}t          j        rDt          d|z  
                    dd                     t          |           t          d           t          j                            |          d         dz   }|                     |                                |           dS )zkAdd CUDA source in a string to the link. The name of the source
        file should be specified in `name`.NzASSEMBLY %sP   rW  zP================================================================================r   z.ptx)r  r  r	  r  r]  r   compiler   DUMP_ASSEMBLYprintcenterr^   r_   splitextr#  r~  )	rQ   cur   r  r  r  r  logptx_names	            r=   add_cuzLinker.add_cuA
  s%    &&(( 	(B##BI..C'B	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( =T2..S 	=4'//C88999#JJJ(OOO 7##D))!,v5SZZ\\8,,,,,s   'AAAc                     dS )z Add code from a file to the linkNrE   )rQ   r_   kinds      r=   add_filezLinker.add_fileS
  r  r?   c                     t          |d          5 }|                                }d d d            n# 1 swxY w Y   |                     |t          j                            |                     d S )Nrb)r  readr.  r^   r_   basename)rQ   r_   fr+  s       r=   add_cu_filezLinker.add_cu_fileW
  s    $ 	B	 	 	 	 	 	 	 	 	 	 	 	 	 	 	B((../////s   266c                 N   t           j                            |          d         dd         }|dk    rt          d          |dk    r|                     |           dS t
                              |d          }|t          d|           |                     ||           dS )z=Add a file to the link, guessing its type from its extension.r   Nr   z-Don't know how to link file with no extensionr+  z,Don't know how to link file with extension .)r^   r_   r*  rK  r7  FILE_EXTENSION_MAPr   r1  )rQ   r_   extr0  s       r=   add_file_guess_extzLinker.add_file_guess_ext\
  s    gt$$Q'+"99NOOOD[[T"""""%))#t44D|" $-'*$- $- . . .MM$%%%%%r?   c                     dS )zComplete the link. Returns (cubin, size)

        cubin is a pointer to a internal buffer of cubin owned by the linker;
        thus, it should be loaded before the linker is destroyed.
        NrE   rU   s    r=   completezLinker.completej
  r  r?   r   FN)r)   rC   rD   r(  r  r  r   rP   r)  r  r!  r#  r.  r1  r7  r;  r=  rE   r?   r=   r  r  !
  s       ))= = = [=   ^
 = = ^ X= > > ^ X> 5 5 ^5- - -$ / / ^/0 0 0
& & &   ^  r?   r  zYMinor version compatibility requires ptxcompiler and cubinlinker packages to be availablec                   d     e Zd ZdZd fd	Zed             Zed             ZddZd	 Z	d
 Z
 xZS )r  z_
    Linker supporting Minor Version Compatibility, backed by the cubinlinker
    package.
    NFc                    	 ddl m} n'# t          $ r}t          t                    |d }~ww xY w|t	          d          t                                          |||           d|d         dz  |d         z    }d|dg}|rd	| }|                    |           |r|                    d
           t          |          | _	         |d|           | _
        d S )Nr   )CubinLinkerzEMVCLinker requires Compute Capability to be specified, but cc is Nonesm_r   r   z
--gpu-namez-cz--maxrregcount=z--generate-line-infoz--arch=)cubinlinkerrA  ImportError_MVC_ERROR_MESSAGErK  rO   rP   r{   rb  ptx_compile_options_linker)
rQ   r  r  r  rA  r  archptx_compile_optsr   rR   s
            r=   rP   zMVCLinker.__init__~
  s)   	;/////// 	; 	; 	;011s:	; :  ; < < < 	"555)RURZ"Q%'))($5 	)3M33C##C((( 	<##$:;;;#()9#:#: "{#3T#3#344s   
 
.).c                     | j         j        S rL   )rG  r  rU   s    r=   r  zMVCLinker.info_log
  s    |$$r?   c                     | j         j        S rL   )rG  r!  rU   s    r=   r!  zMVCLinker.error_log
  s    |%%r?   <cudapy-ptx>c                 "   	 ddl m} ddlm} n'# t          $ r}t	          t
                    |d }~ww xY w ||                                | j                  }	 | j        	                    |j
        |           d S # |$ r}t          |d }~ww xY w)Nr   )compile_ptxCubinLinkerError)ptxcompilerrN  rC  rP  rD  rE  r_  rF  rG  	add_cubincompiled_programrH   )rQ   r  r   rN  rP  r  compile_resultrx   s           r=   r#  zMVCLinker.add_ptx
  s    	;//////4444444 	; 	; 	;011s:	;$SZZ\\43KLL	%L"">#BDIIIII 	% 	% 	%1$	%s(    
3.3 A< <BB		Bc                    	 ddl m} n'# t          $ r}t          t                    |d }~ww xY w	 t	          |d          5 }|                                }d d d            n# 1 swxY w Y   n # t          $ r t          | d          w xY wt          j	        |          j
        }|t          d         k    r| j        j        }nz|t          d         k    r| j        j        }n\|t          d         k    rt          d|           |t          d	         k    r|                     ||          S t          d|           	  |||           d S # |$ r}	t          |	d }	~	ww xY w)
Nr   rO  r3  
 not foundr  r  r  zDon't know how to link r  )rC  rP  rD  rE  r  r4  FileNotFoundErrorrH   pathlibPathr   r9  rG  rR  
add_fatbinr#  )
rQ   r_   r0  rP  r  r6  r  r   r  rx   s
             r=   r1  zMVCLinker.add_file
  s   	;4444444 	; 	; 	;011s:	;	3dD!!  Qvvxx                               	3 	3 	3111222	3 |D!!&%g...'BB'111(BB',,,>>>???'...<<d+++>>>???	%BtTNNNNN 	% 	% 	%1$	%sW   	 
-(-A. A"A. "A&&A. )A&*A. .B E E EE c                     	 ddl m} n'# t          $ r}t          t                    |d }~ww xY w	 | j                                        S # |$ r}t          |d }~ww xY w)Nr   rO  )rC  rP  rD  rE  rG  r=  rH   )rQ   rP  r  rx   s       r=   r=  zMVCLinker.complete
  s    	;4444444 	; 	; 	;011s:	;	%<((*** 	% 	% 	%1$	%s'   	 
-(-A
 
AAA)NFNrL  r)   rC   rD   r(  rP   r)  r  r!  r#  r1  r=  rW   rX   s   @r=   r  r  y
  s         5 5 5 5 5 5. % % X% & & X&
% 
% 
% 
%% % %:	% 	% 	% 	% 	% 	% 	%r?   r  c                   d     e Zd ZdZd fd	Zed             Zed             Zdd	Zd
 Z	d Z
 xZS )r  1
    Links for current device if no CC given
    r   FNc                    t                                          |||           t          j        }t	          |z              }t	          |z              }t
          j        t          |          t
          j        t          |          t
          j
        t          |          t
          j        t          |          t
          j        t          d          i}|rt          |          |t
          j        <   |rt          d          |t
          j        <   |d|t
          j        <   n0|d         dz  |d         z   }t          |          |t
          j        <   t#          |                                          }	t#          |                                          }
t)          j        t-          |	          z  |	 }t          t-          |
          z  |
 }t)          j                    x| _        }t2                              t-          |	          ||t7          | j                             t9          j        | t2          j        |           || _        || _         ||||g| _!        d S )Nr   r   r   )"rO   rP   r   r  r	   r   r  r   r  r   r  r  r  CU_JIT_MAX_REGISTERSCU_JIT_GENERATE_LINE_INFOCU_JIT_TARGET_FROM_CUCONTEXTCU_JIT_TARGETr  r  r  r   r  r   cu_link_stater  r  cuLinkCreater   r  r4  cuLinkDestroylinker_info_buflinker_errors_buf_keep_alive)rQ   r  r  r  r  
linkerinfolinkererrorsr  cc_valraw_keys
raw_valuesr  r  r  rR   s                 r=   rP   zCtypesLinker.__init__
  s   "555$un''
)) ()J*?*?3Xe__)9\+B+B4huoo$hqkk
  	J2:=2I2IGE./ 	C7?{{GE34::;GE677URZ"Q%'F+3F+;+;GE'('''..**++
+c(mm;hG#j//1J?%3555fCMM;!$+..	0 	0 	0 	v3V<<<)!-&k;Or?   c                 @    | j         j                            d          S r|  )rh  r  r_  rU   s    r=   r  zCtypesLinker.info_log  s    #)00888r?   c                 @    | j         j                            d          S r|  )ri  r  r_  rU   s    r=   r!  zCtypesLinker.error_log  s    %+226:::r?   rL  c           
      ^   t          |          }t          |                    d                    }| xj        ||gz  c_        	 t                              | j        t          j        |t          |          |dd d            d S # t          $ r}t          |d| j                  d }~ww xY wNr}  r   ru   )r
   r~  rj  r  cuLinkAddDatar  r   CU_JIT_INPUT_PTXr   rJ   rH   r!  )rQ   r  r   ptxbufnamebufrx   s         r=   r#  zCtypesLinker.add_ptx	  s    #4;;v..//VW--	>  e.D!'S7AtTK K K K K 	> 	> 	>!!!T^^<===	>s   =B 
B,B''B,c                 X   t          |                    d                    }| j                            |           	 t                              | j        ||dd d            d S # t          $ r;}|j        t          j
        k    r| d}n|d| j        }t          |          d }~ww xY wNr}  r   rV  ru   )r
   r~  rj  r{   r  cuLinkAddFiler  rJ   rM   r   CUDA_ERROR_FILE_NOT_FOUNDr!  rH   rQ   r_   r0  pathbufrx   rN   s         r=   r1  zCtypesLinker.add_file  s    4;;v..//(((	#  dGQdKKKKK 	# 	# 	#v888)))"#!!T^^4c"""	#s   $A$ $
B).6B$$B)c                    t          d          }t          d          }	 t                              | j        t          |          t          |                     n+# t          $ r}t          |d| j                  d }~ww xY w|j	        }|dk    s
J d            | j
        d d = t          j        |t          j        t          j                            }t          t           j                            ||f                    S )Nr   ru   "linker returned a zero sized cubinr  )r   r   r  cuLinkCompleter  r   rJ   rH   r!  r  rj  rh   castr   r	   rc  np	ctypeslibas_arrayrQ   	cubin_bufr  rx   	cubin_ptrs        r=   r=  zCtypesLinker.complete   s    QKK	{{	>!!$+uY/?/?tMMMM 	> 	> 	>!!!T^^<===	> zaxxx=xxxQQQ K	6>&-+H+HII	R\**9TG*DDEEEs   ;A 
B&A??Br>  r\  r]  rX   s   @r=   r  r  
  s         )P )P )P )P )P )PV 9 9 X9 ; ; X;> > > ># # #F F F F F F Fr?   r  c                   d     e Zd ZdZd fd	Zed             Zed             Zdd	Zd
 Z	d Z
 xZS )r  r_  r   FNc           
          t                                          |||           t          j        }t	          |          }t	          |          }t
          j        }|j        ||j        ||j	        ||j
        ||j        di}|r
|||j        <   |r
d||j        <   |d||j        <   n;|d         dz  |d         z   }	t          t
          j        d|	           }
|
||j        <   t%          |                                          }t%          |                                          }t*                              t/          |          ||          | _        t3          j        | t*          j        | j                   || _        || _        ||||g| _        d S )Nr   r   r   CU_TARGET_COMPUTE_)rO   rP   r   r  r  r   r  r  r  r  r  r  ra  rb  rc  r.   CUjit_targetrd  r  r  r  r  rf  r   r  r  r4  rg  rh  ri  rj  )rQ   r  r  r  r  rk  rl  r  r  rm  cc_enumrn  ro  rR   s                r=   rP   zCudaPythonLinker.__init__6  s   "555$u%%
 '')
 -z8%.95)1
  	E7DGJ34 	><=GJ89:?@GJ;<<URZ"Q%'Fg2;6;;= =G07GJ,-'''..**++
))#h--:NNv3T[AAA)!-&h
Kr?   c                 6    | j                             d          S r|  )rh  r_  rU   s    r=   r  zCudaPythonLinker.info_log`  s    #**6222r?   c                 6    | j                             d          S r|  )ri  r_  rU   s    r=   r!  zCudaPythonLinker.error_logd  s    %,,V444r?   rL  c           
      4   |                     d          }| xj        ||gz  c_        	 t          j        j        }t
                              | j        ||t          |          |dg g            d S # t          $ r}t          |d| j                  d }~ww xY wrs  )r~  rj  r   CUjitInputTyperu  r  rt  r  r   rJ   rH   r!  )rQ   r  r   rw  	input_ptxrx   s         r=   r#  zCudaPythonLinker.add_ptxh  s    ++f%%S'N*	>.?I  ic#hh!(!R5 5 5 5 5 	> 	> 	>!!!T^^<===	>s   AA/ /
B9BBc                 H   |                     d          }| j                            |           	 t                              | j        ||dg g            d S # t          $ r@}|j        t          j	        j
        k    r| d}n|d| j        }t          |          d }~ww xY wry  )r~  rj  r{   r  rz  r  rJ   rM   r   r   r{  r!  rH   r|  s         r=   r1  zCudaPythonLinker.add_filer  s    ++f%%(((	#  dGQBGGGGG 	# 	# 	#v)CCC)))"#!!T^^4c"""	#s   $A 
B!!;BB!c                    	 t                               | j                  \  }}n+# t          $ r}t	          |d| j                  d }~ww xY w|dk    s
J d            | j        d d = t          j        |t          j	        t          j
                            }t          t          j                            ||f                    S )Nru   r   r  r  )r  r  r  rJ   rH   r!  rj  rh   r  r   r	   rc  r  r  r  r  s        r=   r=  zCudaPythonLinker.complete  s    	>$33DK@@OItt 	> 	> 	>!!!T^^<===	> axxx=xxxQQQK	6>&-+H+HII	R\**9TG*DDEEEs   "% 
AAAr>  r\  r]  rX   s   @r=   r  r  2  s         (L (L (L (L (L (LT 3 3 X3 5 5 X5> > > ># # #
F 
F 
F 
F 
F 
F 
Fr?   r  c                    | dk    rt           rBt          j        }|j        }t          j        |           }t
                              ||          S t          j                    }t          j        }t
                              t          |          ||            |S t           rt          j                    S t          j                    S )zZQuery the device pointer usable in the current context from an arbitrary
    pointer.
    r   )r   r   CUpointer_attribute#CU_POINTER_ATTRIBUTE_DEVICE_POINTERrD  r  cuPointerGetAttributer   r   r   r   )r  	ptr_attrsr|  ptrobjrY  s        r=   get_devptr_for_active_ctxr    s     axx 		3I@D(--F//f===)++F<D((vcBBBM 	*&(((')))r?   c                    t          |           }t          rCt                              |          \  }}|t	          j        t          |          |z             fS t          j                    }t                      }t                              t          |          t          |          |           |j        |j        }}|||z   fS )a  Find the extents (half open begin and end pointer) of the underlying
    device memory allocation.

    NOTE: it always returns the extents of the allocation but the extents
    of the device memory view that can be a subsection of the entire allocation.
    )rK  r   r  cuMemGetAddressRanger   rD  r0   r   r   r   r   r  )devmemrY  sns       r=   r  r    s     #6**F **6221'%c!ffqj1111 ""JJ##E!HHeAhh???w1!a%xr?   c                     t          | dd          }|Et          |           \  }}t          r t          |          t          |          z
  }n||z
  }|| _        |dk    sJ d                    |                      |S )zCheck the memory size of the device memory.
    The result is cached in the device memory object.
    It may query the driver for the memory size of the device memory allocation.
    r0  Nr   z{} length array)r.   r  r   r0   r0  rJ  )r  szr  rx   s       r=   device_memory_sizer    s    
 
)4	0	0B	zf%%1 	Q#a&&BBQB "777%,,R00777Ir?   c                 >    t          | dd          }|duo|j        dv S )z?Returns True if the obj.dtype is datetime64 or timedelta64
    r  NMm)r.   char)r   r  s     r=   _is_datetime_dtyper    s,     C$''E3t!33r?   c                 b    t          |           r|                     t          j                  } | S )z^Workaround for numpy#4983: buffer protocol doesn't support
    datetime64 or timedelta64.
    )r  r  r  int64r   s    r=   _workaround_for_datetimer    s,     # !hhrx  Jr?   c                     t          | t                    r| S d}|s)t          | t          j                  pt	          |           }t          |           } t          j        | ||          S )a  Get host pointer from an obj.

    If `readonly` is False, the buffer must be writable.

    NOTE: The underlying data pointer from the host data buffer is used and
    it should not be changed until the operation which can be asynchronous
    completes.
    F)r/   r0   r  voidr  r  r   memoryview_get_buffer)r   readonlyforcewritables      r=   rT  rT    sj     #s 
M L"300K4Fs4K4K
"3
'
'C)#}hGGGr?   c                 H    t          |           } t          j        |           S )zHReturns (start, end) the start and end pointer of the array (half open).)r  r   memoryview_get_extentsr  s    r=   host_memory_extentsr    s     
"3
'
'C*3///r?   c                     t          |           t          |          k    s
J d            t          |           }t          j        | |||          \  }}||z
  S )z_Get the byte size of a contiguous memory buffer given the shape, strides
    and itemsize.
    z# dim mismatch)r   r   memoryview_get_extents_info)r  r  r  ndimr  rx   s         r=   memory_size_from_infor    sX     u::W%%%'7%%%u::D/whOODAqq5Lr?   c                 P    t          |           \  }}||k    s
J d            ||z
  S )zGet the size of the memoryzmemory extend of negative size)r  )r   r  rx   s      r=   host_memory_sizer    s0    s##DAq6663666q5Lr?   c                 F    t           r| j        S t          |           j        S )z$Get the device pointer as an integer)r   rK  r  r  s    r=   r/  r/    s$     0(($S))//r?   c                 P    | t          d          S t          |            | j        S )z,Get the ctypes object for the device pointerNr   )r   require_device_memoryrK  r  s    r=   rK  rK    s*    
{{{#$$r?   c                 $    t          | dd          S )a_  All CUDA memory object is recognized as an instance with the attribute
    "__cuda_memory__" defined and its value evaluated to True.

    All CUDA memory object should also define an attribute named
    "device_pointer" which value is an int object carrying the pointer
    value of the device memory address.  This is not tested in this method.
    rN  F)r.   r  s    r=   is_device_memoryr    s     3)5111r?   c                 B    t          |           st          d          dS )z9A sentry for methods that accept CUDA memory object.
    zNot a CUDA memory object.N)r  r  r  s    r=   r  r    s-     C   534445 5r?   c                 R    t          | dg           }|                    |           dS )zAdd dependencies to the device memory.

    Mainly used for creating structures that points to other device memory,
    so that the referees are not GC and released.
    	_depends_N)r.   extend)r  objsdepsets      r=   device_memory_dependsr  &  s-     V["--F
MM$r?   c                     g }|r>t          |t                    sJ t          j        }|                    |j                   nt          j        } |t          |           t          |d          |g|R   dS )
    NOTE: The underlying data pointer from the host data buffer is used and
    it should not be changed until the operation which can be asynchronous
    completes.
    T)r  N)	r/   r  r  cuMemcpyHtoDAsyncr{   r  cuMemcpyHtoDr/  rT  dstsrcr  r@  varargsr  s         r=   host_to_devicer  0  s     G !&&)))))%v}%%%% B~cLt<<<dMWMMMMMMr?   c                     g }|r>t          |t                    sJ t          j        }|                    |j                   nt          j        } |t          |           t          |          |g|R   dS r  N)	r/   r  r  cuMemcpyDtoHAsyncr{   r  cuMemcpyDtoHrT  r/  r  s         r=   device_to_hostr  B  s     G !&&)))))%v}%%%% B|C.--t>g>>>>>>r?   c                     g }|r>t          |t                    sJ t          j        }|                    |j                   nt          j        } |t          |           t          |          |g|R   dS r  )r/   r  r  cuMemcpyDtoDAsyncr{   r  cuMemcpyDtoDr/  r  s         r=   r  r  T  s     G !&&)))))%v}%%%% B~cN3//@@@@@@@r?   c                     g }|r>t          |t                    sJ t          j        }|                    |j                   nt          j        } |t          |           ||g|R   dS )zMemset on the device.
    If stream is not zero, asynchronous mode is used.

    dst: device memory
    val: byte value to be written
    size: number of byte to be written
    stream: a CUDA stream
    N)r/   r  r  r=  r{   r  r>  r/  )r  valr  r@  r  r  s         r=   device_memsetr  f  sx     G &&)))))#v}%%%%B~cC0000000r?   c                  8    t                                            dS )z;
    Enable profile collection in the current context.
    N)r  cuProfilerStartrE   r?   r=   profile_startr  {  s     r?   c                  8    t                                            dS )z<
    Disable profile collection in the current context.
    N)r  cuProfilerStoprE   r?   r=   profile_stopr    s     r?   c               #   J   K   t                       dV  t                       dS )z]
    Context manager that enables profiling on entry and disables profiling on
    exit.
    N)r  r  rE   r?   r=   	profilingr    s'       OOO	EEENNNNNr?   c                  4    t                                           S )z@
    Return the driver version as a tuple of (major, minor)
    )r  r&  rE   r?   r=   r&  r&    s     r?   r  r'  )r(  r4   r^   rh   r  r   r  r'   r@  r  rX  	itertoolsr   abcr   r   r   r   r   r	   r
   r   r   r   r   r  r  numpyr  collectionsr   r   r  r   
numba.corer   r   r   r   r   r   r   r   r   r   r   numba.cuda.cudadrvr   r   r   CUDA_USE_NVIDIA_BINDINGr   r!   r   r  r  rg   r   rl  	pythonapi	Py_DecRefr  	Py_IncRefrz  	py_objectr   r>   rK  rA   rH   rJ   rs   r   r   r   r   rf   r}   r   r   r   r   r   r  r  rE  r{  r  r  r  r  r  r  r  r  r  r  r0   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rn  r  r  MemAllocr  r  r9  r[  rc  r  r  r  r  r  r  r  r  r  r  r  r  jittyCU_JIT_INPUT_OBJECTru  CU_JIT_INPUT_LIBRARYCU_JIT_INPUT_CUBINCU_JIT_INPUT_FATBINARYr9  r  rE  r  r  r  r  r  r  r  r  rT  r  r  r  r/  rK  r  r  r  r  r  r  r  r  r  r  r  r&  rE   r?   r=   <module>r     s    


 				                     ' ' ' ' ' ' ' '/ / / / / / / / / / / / / / / / / / / / / /             ) ) ) ) ) ) ) )       / / / / / / / / / / 4 4 4 4 4 4 4 4 " " " " " " L L L L L L L L L L < < < < < < < < < < < </ $$$$$$ |&&w// '
'
'(
 '(
   0	 	 	 	 	l 	 	 		 	 	 	 	, 	 	 	1 1 1 1 1? 1 1 1(  (  ( V$ $ $0    1 1 16 6 6     %$&&	B B B B BV B B BJ4 4 4 4 4V 4 4 4n 
   0/11 Y1 Y1 Y1 Y1 Y1V Y1 Y1 Y1x: : :q q q q qFg q q q qh@ @ @ @ @ 5 @ @ @F( ( ( ( ( ( ( (2.0 .0 .0 .0 .0.0I .0 .0 .0b $%  7 7 7$     *	 	 	 	 	# 	 	 	 kmmB B B B Bv B B BJ Zl33
	i& i& i& i& i&f i& i& i&X	8 8 8< < <> @  @  @F	 	 	  .  .      0!  !  !  !  ! 6 !  !  ! H' ' ' ' 'V ' ' 'Tv; v; v; v; v; v; v; v;ro- o- o- o- o-F o- o- o-d    m   +7 +7 +7 +7 +7? +7 +7 +7\& & & & &8$ & & &R#8 #8 #8 #8 #8O #8 #8 #8L* * * * *6 * * *>	 	 	 	 	x'8 	 	 		 	 	 	 	,(9 	 	 	w w w w wV w w wt70 70 70 70 70F 70 70 70t	 	 	C C C C Cw C C C C.B B B B B6 B B B < < < < <v < < < :j #1 #1 #1 2 2        D+ + + + +X + + +>+ + + + + + + +J $ %  %  %  %F  "E&%')).  &%')). O O O O Ow O O O Of V% V% V% V% V% V% V% V%r]F ]F ]F ]F ]F6 ]F ]F ]F@WF WF WF WF WFv WF WF WFz* * *,  &  "4 4 4  H H H H(0 0 0    0 0 0% % %2 2 25 5 5  N N N N$? ? ? ?$A A A A$1 1 1 1*                r?   