
    J/Ph"                        d dl mZ d dlmZ d dlmZ d dlmZm	Z	m
Z
  G d de          Z G d de
          Z G d	 d
e	          Z G d de          ZdZ G d dej                  ZdZ G d dej                  ZdS )    )cuda)array)deviceufunc)UFuncMechanismGeneralizedUFuncGUFuncCallStepsc                   ,    e Zd ZdZd Zd ZddZd ZdS )	CUDAUFuncDispatcherzD
    Invoke the CUDA ufunc specialization for the given inputs.
    c                 ,    || _         |j        | _        d S N)	functions__name__)selftypes_to_retty_kernelspyfuncs      V/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/numba/cuda/vectorizers.py__init__zCUDAUFuncDispatcher.__init__   s    /    c                 D    t                               | j        ||          S )a  
        *args: numpy arrays or DeviceArrayBase (created by cuda.to_device).
               Cannot mix the two types in one call.

        **kws:
            stream -- cuda stream; when defined, asynchronous mode is used.
            out    -- output array. Can be a numpy array or DeviceArrayBase
                      depending on the input arguments.  Type must match
                      the input arguments.
        )CUDAUFuncMechanismcallr   )r   argskwss      r   __call__zCUDAUFuncDispatcher.__call__   s     "&&t~tSAAAr   r   c                    t          t          | j                                                  d                   dk    s
J d            |j        dk    s
J d            |j        d         }g }|dk    rt          d          |dk    r|d         S |pt          j                    }|	                                5  t          j
        j                            |          r|}nt          j        ||          }|                     |||          }t          d|j                  }|                    ||	           d d d            n# 1 swxY w Y   |d         S )
Nr      zmust be a binary ufunc   zmust use 1d arrayzReduction on an empty array.)r   )dtypestream)lenlistr   keysndimshape	TypeErrorr   r    auto_synchronizecudadrvdevicearrayis_cuda_ndarray	to_device_CUDAUFuncDispatcher__reducenp_arrayr   copy_to_host)r   argr    ngpu_memsmemoutbufs           r   reducezCUDAUFuncDispatcher.reduce   s   4++--..q122a777 :A777x1}}}1}}}IaL66:;;;!VVq6M (4;==$$&& 
	1 
	1|'77<< 2nS&11--Xv66C4sy111CS000
	1 
	1 
	1 
	1 
	1 
	1 
	1 
	1 
	1 
	1 
	1 
	1 
	1 
	1 
	1 1vs   8BEE	E	c                    |j         d         }|dz  dk    r|                    |dz
            \  }}|                    |           |                    |           |                     |||          }|                    |            | ||||          S |                    |dz            \  }}	|                    |           |                    |	            | ||	||           |dz  dk    r|                     |||          S |S )Nr   r   r   )r3   r    )r%   splitappendr,   )
r   r2   r1   r    r0   fatcutthincutr3   leftrights
             r   __reducezCUDAUFuncDispatcher.__reduce;   s   IaLq5A::!iiA..OFGOOF###OOG$$$--&99COOC   4W#f====))AF++KD%OOD!!!OOE"""Du$v6666Avzz}}T8V<<<r   Nr   )r   
__module____qualname____doc__r   r   r5   r,    r   r   r
   r
      sb         ( ( (B B B   :    r   r
   c                   H     e Zd ZdgZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	_CUDAGUFuncCallSteps_streamc                     t                                          ||||           |                    dd          | _        d S )Nr    r   )superr   getrE   )r   ninnoutr   kwargs	__class__s        r   r   z_CUDAGUFuncCallSteps.__init__X   s:    dD&111zz(A..r   c                 *    t          j        |          S r   r   is_cuda_arrayr   objs     r   is_device_arrayz$_CUDAGUFuncCallSteps.is_device_array\       !#&&&r   c                 v    t           j        j                            |          r|S t          j        |          S r   r   r(   r)   r*   as_cuda_arrayrP   s     r   as_device_arrayz$_CUDAGUFuncCallSteps.as_device_array_   5     <#33C88 	J!#&&&r   c                 8    t          j        || j                  S Nr   )r   r+   rE   )r   hostarys     r   r+   z_CUDAGUFuncCallSteps.to_devicei   s    ~gdl;;;;r   c                 >    |                     || j                  }|S rZ   )r.   rE   )r   devaryr[   r3   s       r   to_hostz_CUDAGUFuncCallSteps.to_hostl   s!    !!'$,!??
r   c                 :    t          j        ||| j                  S N)r%   r   r    )r   device_arrayrE   )r   r%   r   s      r   allocate_device_arrayz*_CUDAGUFuncCallSteps.allocate_device_arrayp   s     uE$,OOOOr   c                 D     |                     || j                  |  d S rZ   )forallrE   )r   kernelnelemr   s       r   launch_kernelz"_CUDAGUFuncCallSteps.launch_kernels   s'    1eDL1148888r   )r   r?   r@   	__slots__r   rR   rW   r+   r^   rb   rg   __classcell__rL   s   @r   rD   rD   S   s        I/ / / / /' ' '' ' '< < <  P P P9 9 9 9 9 9 9r   rD   c                   @     e Zd Z fdZed             Zd Zd Z xZS )CUDAGeneralizedUFuncc                 d    |j         | _         t                                          ||           d S r   )r   rG   r   )r   	kernelmapenginer   rL   s       r   r   zCUDAGeneralizedUFunc.__init__x   s,    F+++++r   c                     t           S r   )rD   r   s    r   _call_stepsz CUDAGeneralizedUFunc._call_steps|   s    ##r   c                 f    t           j        j                            |d|j        |j                  S Nr>   r%   stridesr   gpu_data)r   r(   r)   DeviceNDArrayr   rw   )r   aryr%   s      r   _broadcast_scalar_inputz,CUDAGeneralizedUFunc._broadcast_scalar_input   s6    |'55E>B<?I?B| 6 M M 	Mr   c                     t          |          t          |j                  z
  }d|z  |j        z   }t          j        j                            |||j        |j                  S rt   )	r!   r%   rv   r   r(   r)   rx   r   rw   )r   ry   newshapenewax
newstridess        r   _broadcast_add_axisz(CUDAGeneralizedUFunc._broadcast_add_axis   s^    HCI.E\CK/
|'55H>H<?I?B| 6 M M 	Mr   )	r   r?   r@   r   propertyrr   rz   r   ri   rj   s   @r   rl   rl   w   s{        , , , , , $ $ X$M M MM M M M M M Mr   rl   c                   @    e Zd ZdZdZd Zd Zd Zd Zd Z	d Z
d	 Zd
S )r   z%
    Provide CUDA specialization
    r   c                 :     |                     ||          |  d S rZ   )rd   )r   funccountr    r   s        r   launchzCUDAUFuncMechanism.launch   s%    )E&))40000r   c                 *    t          j        |          S r   rN   rP   s     r   rR   z"CUDAUFuncMechanism.is_device_array   rS   r   c                 v    t           j        j                            |          r|S t          j        |          S r   rU   rP   s     r   rW   z"CUDAUFuncMechanism.as_device_array   rX   r   c                 .    t          j        ||          S rZ   )r   r+   )r   r[   r    s      r   r+   zCUDAUFuncMechanism.to_device   s    ~gf5555r   c                 .    |                     |          S rZ   )r.   )r   r]   r    s      r   r^   zCUDAUFuncMechanism.to_host   s    ""&"111r   c                 0    t          j        |||          S r`   )r   ra   )r   r%   r   r    s       r   rb   z(CUDAUFuncMechanism.allocate_device_array   s     uE&IIIIr   c                 N   fdt          t                              D             }t                    t          j                  z
  }dg|z  t          j                  z   }|D ]}d||<   t
          j        j                            |j	        j
                  S )Nc                 X    g | ]&}|j         k    sj        |         |         k    $|'S rB   )r$   r%   ).0axry   r%   s     r   
<listcomp>z7CUDAUFuncMechanism.broadcast_device.<locals>.<listcomp>   sB     5 5 5Rsx2%)33 333r   r   ru   )ranger!   r%   r"   rv   r   r(   r)   rx   r   rw   )r   ry   r%   
ax_differs
missingdimrv   r   s    ``    r   broadcast_devicez#CUDAUFuncMechanism.broadcast_device   s    5 5 5 5 55U#4#4 5 5 5
 ZZ#ci..0
#
"T#+%6%66 	 	BGBKK|'55E>E<?I?B| 6 M M 	Mr   N)r   r?   r@   rA   DEFAULT_STREAMr   rR   rW   r+   r^   rb   r   rB   r   r   r   r      s          N1 1 1' ' '' ' '6 6 62 2 2J J JM M M M Mr   r   z
def __vectorized_{name}({args}, __out__):
    __tid__ = __cuda__.grid(1)
    if __tid__ < __out__.shape[0]:
        __out__[__tid__] = __core__({argitems})
c                   <    e Zd Zd Zd Zd Zd Zed             ZdS )CUDAVectorizec                      t          j        |dd          | j                  }||j        |j                 j        j        fS )NT)deviceinline)r   jitr   	overloadsr   	signaturereturn_type)r   sigcudevfns      r   _compile_corezCUDAVectorize._compile_core   s?    9$(3tD999$+FF)#(3=IIIr   c                 |    | j         j                                        }|                    t          |d           |S )N__cuda____core__)r   __globals__copyupdater   )r   corefnglbls      r   _get_globalszCUDAVectorize._get_globals   sB    {&++--!') ) 	* 	* 	*r   c                 *    t          j        |          S r   r   r   r   fnobjr   s      r   _compile_kernelzCUDAVectorize._compile_kernel   s    xr   c                 6    t          | j        | j                  S r   )r
   rn   r   rq   s    r   build_ufunczCUDAVectorize.build_ufunc   s    "4>4;???r   c                     t           S r   )vectorizer_stager_sourcerq   s    r   _kernel_templatezCUDAVectorize._kernel_template   s    ''r   N)	r   r?   r@   r   r   r   r   r   r   rB   r   r   r   r      sq        J J J    @ @ @ ( ( X( ( (r   r   zy
def __gufunc_{name}({args}):
    __tid__ = __cuda__.grid(1)
    if __tid__ < {checkedarg}:
        __core__({argitems})
c                   6    e Zd Zd Zd Zed             Zd ZdS )CUDAGUFuncVectorizec                 x    t          j        | j        | j                  }t	          | j        || j                  S )N)rn   ro   r   )r   GUFuncEngineinputsig	outputsigrl   rn   r   )r   ro   s     r   r   zCUDAGUFuncVectorize.build_ufunc   s;    )$-HH#dn+1+/;8 8 8 	8r   c                 <     t          j        |          |          S r   r   r   s      r   r   z#CUDAGUFuncVectorize._compile_kernel   s    tx}}U###r   c                     t           S r   )_gufunc_stager_sourcerq   s    r   r   z$CUDAGUFuncVectorize._kernel_template   s    $$r   c                      t          j        |d          | j                  }| j        j                                        }|                    t           |d           |S )NT)r   r   )r   r   r   py_funcr   r   r   )r   r   r   glblss       r   r   z CUDAGUFuncVectorize._get_globals   sb    +#d+++DK88(--//$"(* * 	+ 	+ 	+r   N)r   r?   r@   r   r   r   r   r   rB   r   r   r   r      s\        8 8 8$ $ $ % % X%    r   r   N)numbar   numpyr   r-   
numba.cudar   numba.cuda.deviceufuncr   r   r   objectr
   rD   rl   r   r   DeviceVectorizer   r   DeviceGUFuncVectorizer   rB   r   r   <module>r      s         # # # # # # " " " " " "5 5 5 5 5 5 5 5 5 5H H H H H& H H HV!9 !9 !9 !9 !9? !9 !9 !9HM M M M M+ M M M2-M -M -M -M -M -M -M -M` ( ( ( ( (K/ ( ( (2     +;     r   