
    J/Ph7x                     x   d Z ddlmZmZ ddlmZ ddlZddlZddlm	Z	 ddl
ZddlmZmZ ddlmZmZ ddlmZ dd	lmZ d
 Zd Zd Z G d de          Zd Z G d de          Z G d de          Zd Zd Zd Z d Z! G d de          Z" G d de          Z# G d de          Z$ G d de           Z%dS )!zA
Implements custom ufunc dispatch mechanism for non-CPU devices.
    )ABCMetaabstractmethod)OrderedDictN)reduce)_BaseUFuncBuilderparse_identity)typessigutils)	signatureparse_signaturec                 x    | |k    r| S | dk    r|S |dk    r| S t          d                    | |                    )=
    Raises
    ------
    ValueError if broadcast fails
       zfailed to broadcast {0} and {1})
ValueErrorformat)abs     V/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/numba/cuda/deviceufunc.py_broadcast_axisr      sM     	Avv	
a	
a:AA!QGGHHH    c                    t          t          | |g          \  } }t          |           t          |          k     r%d| z   } t          |           t          |          k     %t          |           t          |          k    r%d|z   }t          |           t          |          k    %t          d t          | |          D                       S )r   r   c              3   <   K   | ]\  }}t          ||          V  d S N)r   ).0r   r   s      r   	<genexpr>z&_pairwise_broadcast.<locals>.<genexpr>1   s0      GG41aA&&GGGGGGr   )maptuplelenzip)shape1shape2s     r   _pairwise_broadcastr$   #   s      011NFF
f++F
#
# f++F
#
# f++F
#
# f++F
#
# GG3vv3F3FGGGGGGr   c                      | sJ | d         }| dd         }	 t          |d          D ]\  }}t          ||          }	 |S # t          $ r# t          d                    |                    w xY w)r   r   r   N)startz!failed to broadcast argument #{0})	enumerater$   r   r   )	shapelistresultothersieachs        r   _multi_broadcastr-   4   s     q\Fqrr]F q111 	7 	7GAt(66FF	7
   H H H<CCAFFGGGHs   'A -A/c                       e Zd ZdZdZdZd Zd Zd Zd Z	d Z
d	 Zd
 Zd Zd Zd Zd Zd Zed             Zd Zd Zd Zd ZdS )UFuncMechanismz0
    Prepare ufunc arguments for vectorize.
    NFc                     || _         || _        t          | j                  }dg|z  | _        g | _        d| _        dg|z  | _        dS )zFNever used directly by user. Invoke by UFuncMechanism.call().
        N)typemapargsr    argtypes	scalarposr   arrays)selfr1   r2   nargss       r   __init__zUFuncMechanism.__init__N   sK     	DIfunr   c                 h   t          | j                  D ]\  }}|                     |          r|                     |          | j        |<   8t          |t          t          t          t          j
        f          r| j                            |           t          j        |          | j        |<   dS )z1
        Get all arguments in array form
        N)r'   r2   is_device_arrayas_device_arrayr5   
isinstanceintfloatcomplexnpnumberr4   appendasarray)r6   r+   args      r   _fill_arrayszUFuncMechanism._fill_arraysY   s      	** 	1 	1FAs##C(( 1!%!5!5c!:!:AC#ugry!ABB 1%%a((((!#CA	1 	1r   c                     t          | j                  D ]<\  }}|5t          |d          }|t          j        |          j        }|| j        |<   =dS )z
        Get dtypes
        Ndtype)r'   r5   getattrr@   rC   rG   r3   )r6   r+   aryrG   s       r   _fill_argtypeszUFuncMechanism._fill_argtypesf   sa      ,, 	) 	)FAsW--=JsOO1E#(a 	) 	)r   c                 n   g }| j         r| j        D ]}g }t          t          || j                            D ]G\  }\  }}|$t          j        | j        |                   j        }|	                    ||k               Ht          |          r|	                    |           |sOg }| j        D ]E}t          d t          || j                  D                       }|r|	                    |           F|st          d          t          |          dk    rt          d          |d         | _        dS )z<Resolve signature.
        May have ambiguous case.
        Nc              3   0   K   | ]\  }}|d u p||k    V  d S r    )r   formalactuals      r   r   z4UFuncMechanism._resolve_signature.<locals>.<genexpr>   sP       "A "A&4ff #)D."DFf4D "A "A "A "A "A "Ar   zNo matching version.  GPU ufunc requires array arguments to have the exact types.  This behaves like regular ufunc with casting='no'.r   zqFailed to resolve ufunc due to ambiguous signature. Too many untyped scalars. Use numpy dtype object to type tag.r   )r4   r1   r'   r!   r3   r@   rC   r2   rG   rB   all	TypeErrorr    )r6   matches	formaltys	match_mapr+   rN   rO   all_matchess           r   _resolve_signaturez!UFuncMechanism._resolve_signatureq   s    > 	.!\ 
. 
.		+4S9=6H 6H ,I ,I 7 7'A'~!#DIaL!9!9!?$$Vv%56666y>> .NN9---  	.G!\ . .	! "A "A"%i"?"?"A "A "A A A  .NN9--- 	E D E E E w<<! B C C C
  
r   c                     | j         D ]7}t          j        | j        |         g| j        |                   | j        |<   8| j        S )zPReturn the actual arguments
        Casts scalar arguments to np.array.
        rG   )r4   r@   arrayr2   r3   r5   )r6   r+   s     r   _get_actual_argszUFuncMechanism._get_actual_args   sJ      	N 	NAXty|nDM!<LMMMDKNN{r   c                 6  	
 d |D             }t          | 
t          |          D ]\  }		j        
k    r|                     	          r|                     	
          ||<   @	
fdt          t          
                    D             }t          
          t          	j                  z
  }dg|z  t          	j                  z   }|D ]}d||<   t          j
        j                            	
|          }|                     |          ||<   |S )z)Perform numpy ufunc broadcasting
        c                     g | ]	}|j         
S rM   shaper   r   s     r   
<listcomp>z-UFuncMechanism._broadcast.<locals>.<listcomp>   s    +++QW+++r   c                 X    g | ]&}|j         k    sj        |         |         k    $|'S rM   )ndimr^   )r   axrI   r^   s     r   r`   z-UFuncMechanism._broadcast.<locals>.<listcomp>   sE     "A "A "A%'38^^%(Yr]eBi%?%? #%%?%?%?r   r   )r^   strides)r-   r'   r^   r:   broadcast_deviceranger    listrd   r@   libstride_tricks
as_stridedforce_array_layout)r6   arysr(   r+   
ax_differs
missingdimrd   rc   stridedrI   r^   s            @@r   
_broadcastzUFuncMechanism._broadcast   sY    ,+d+++	 ),oo 	? 	?FAsyE!! '',, ?"33C??DGG"A "A "A "A "AuSZZ/@/@ "A "A "AJ "%Uc#)nn!<J cJ.ck1B1BBG( ( (&' f2==cDIFM > O OG #55g>>DGGr   c                     |                                   |                                  |                                  |                                 }|                     |          S )z[Prepare and return the arguments for the ufunc.
        Does not call to_device().
        )rE   rJ   rV   rZ   rp   )r6   rl   s     r   get_argumentszUFuncMechanism.get_arguments   s]     	!!!$$&&t$$$r   c                 &    | j         | j                 S )z)Returns (result_dtype, function)
        )r1   r3   r6   s    r   get_functionzUFuncMechanism.get_function   s     |DM**r   c                     dS )zBIs the `obj` a device array?
        Override in subclass
        FrM   r6   objs     r   r:   zUFuncMechanism.is_device_array   s	     ur   c                     |S )zConvert the `obj` to a device array
        Override in subclass

        Default implementation is an identity function
        rM   rw   s     r   r;   zUFuncMechanism.as_device_array   s	     
r   c                      t          d          )zTHandles ondevice broadcasting

        Override in subclass to add support.
        z'broadcasting on device is not supportedNotImplementedErrorr6   rI   r^   s      r   re   zUFuncMechanism.broadcast_device   s    
 ""KLLLr   c                     |S )zSEnsures array layout met device requirement.

        Override in sublcass
        rM   )r6   rI   s     r   rk   z!UFuncMechanism.force_array_layout   s	    
 
r   c                    |                     d| j                  |                     dd          }|r*t          j        dd                    |          z              | ||                                          }                                \  }}|d         j        }|*                    |          r	                    |          }fd|d         j
        dk    rfd	|D             }g }d
}	|D ][}
                    |
          r|                    |
           d}	/                    |
          }|                    |           \|d         j        }|                    ||          }|                    |g                               ||d         |           |	r|                    |          S |                                                    |          S                     |          ra|j
        dk    r |          }|}|                    |g                               ||d         |           |                    |          S |j        |k    sJ |j        |k    sJ                     ||          }|                    |g                               ||d         |           |                    |                              |          S )z1Perform the entire ufunc call mechanism.
        streamoutNzunrecognized keywords: %s, r   c                    j         rt          	 |                                 S # t          $ rW                     |           s                     |                                           }                    |          cY S w xY wr   )SUPPORT_DEVICE_SLICINGr|   ravelr:   to_host	to_device)r   hostarycrr   s     r   attempt_ravelz*UFuncMechanism.call.<locals>.attempt_ravel  s    ( *))9wwyy & 9 9 9))!,, 9 !jjF3399;;G<<888889s   % ABBr   c                 &    g | ]} |          S rM   rM   )r   r   r   s     r   r`   z'UFuncMechanism.call.<locals>.<listcomp>  s#    333MM!$$333r   FT)r   )popDEFAULT_STREAMwarningswarnjoinrr   ru   r^   r:   r;   rb   rB   r   allocate_device_arrayextendlaunchreshapecopy_to_hostrG   )clsr1   r2   kwsr   restyfuncoutshapedevarys
any_devicer   dev_ar^   devoutr   r   r   s                 @@@r   callzUFuncMechanism.call   s2   
 3#566ggeT"" 	HM5		#FGGG S$!!oo''t7= ?r11#66?$$S))C	9 	9 	9 	9 	9 	9" 7<!3333d333D 
 	& 	&A!!!$$ &q!!!!

Qv66u%%%% Q;--eU6-JJFNNF8$$$IIdE!Hfg666 ? ~~h/// **,,44X>>>$$ 	M x!||#mC((FNNF8$$$IIdE!Hfg666>>(+++
 9%%%%9%%%%--eU6-JJFNNF8$$$IIdE!Hfg666&&s6&::BB8LLLr   c                     t           )zBImplement to device transfer
        Override in subclass
        r{   )r6   r   r   s      r   r   zUFuncMechanism.to_deviceK  
     "!r   c                     t           )z@Implement to host transfer
        Override in subclass
        r{   )r6   devaryr   s      r   r   zUFuncMechanism.to_hostQ  r   r   c                     t           )zBImplements device allocation
        Override in subclass
        r{   )r6   r^   rG   r   s       r   r   z$UFuncMechanism.allocate_device_arrayW  r   r   c                     t           )zKImplements device function invocation
        Override in subclass
        r{   )r6   r   countr   r2   s        r   r   zUFuncMechanism.launch]  r   r   )__name__
__module____qualname____doc__r   r   r8   rE   rJ   rV   rZ   rp   rr   ru   r:   r;   re   rk   classmethodr   r   r   r   r   rM   r   r   r/   r/   G   sB         N"	% 	% 	%1 1 1	) 	) 	))# )# )#V    B% % %+ + +
    M M M   WM WM [WMr" " "" " "" " "" " " " "r   r/   c                     t          | t          j                  r| j        } t	          j        t          |                     S r   )r<   r	   
EnumMemberrG   r@   str)tys    r   to_dtyper   d  s4    "e&'' X8CGGr   c                   X    e Zd Zddi fdZed             ZddZd Zd Zd Z	d	 Z
d
 ZdS )DeviceVectorizeNFc                     |rt          d          |D ];}|dk    rt          j        dt                     #d}|dz  }t	          ||z            || _        t          |          | _        t                      | _	        d S )Ncaching is not supportednopythonz+nopython kwarg for cuda target is redundantzUnrecognized options. z3cuda vectorize target does not support option: '%s')
rQ   r   r   RuntimeWarningKeyErrorpy_funcr   identityr   	kernelmap)r6   r   r   cachetargetoptionsoptfmts          r   r8   zDeviceVectorize.__init__k  s     	86777  	* 	*Cj  K,. . . . /LLsSy)))&x00$r   c                     | j         S r   r   rt   s    r   pyfunczDeviceVectorize.pyfunc{  
    |r   c                 @   t          j        |          \  }}t          |g|R  }| j        j        }|                     | j        ||          }|                     |          \  }}|                     |          }t          t          j
        gd |D             |d d          gz   R  }t          ||           |d|z           }	|                     |	|          }
t          d |j        D                       }t          |          }||
f| j        t          |          <   d S )Nc                 "    g | ]}|d d          S r   rM   r_   s     r   r`   z'DeviceVectorize.add.<locals>.<listcomp>  s     &:&:&:qt&:&:&:r   z__vectorized_%sc              3   4   K   | ]}t          |          V  d S r   )r   r   ts     r   r   z&DeviceVectorize.add.<locals>.<genexpr>  s(      ==!(1++======r   )r
   normalize_signaturer   r   r   _get_kernel_source_kernel_template_compile_core_get_globalsr	   voidexec_compile_kernelr   r2   r   r   )r6   sigr2   return_typedevfnsigfuncnamekernelsourcecorefnglblstagerkernel	argdtypesresdtypes                r   addzDeviceVectorize.add  s7   $8==k[04000;'..t/D/7C C"00::  ((
O&:&:T&:&:&:k!!!n=M&MOOO\4   '(23%%fc22==x}=====	K((+3V+;uY''(((r   c                     t           r   r{   rt   s    r   build_ufunczDeviceVectorize.build_ufunc      !!r   c                     d t          t          |j                            D             }t          |d                    |          d                    d |D                                 } |j        di |S )Nc                     g | ]}d |z  S )za%drM   r   r+   s     r   r`   z6DeviceVectorize._get_kernel_source.<locals>.<listcomp>  s    888a	888r   r   c              3       K   | ]	}d |z  V  
dS )z%s[__tid__]NrM   r   s     r   r   z5DeviceVectorize._get_kernel_source.<locals>.<genexpr>  s'      &G&GQ}q'8&G&G&G&G&G&Gr   )namer2   argitemsrM   )rf   r    r2   dictr   r   )r6   templater   r   r2   fmtss         r   r   z"DeviceVectorize._get_kernel_source  s    885SX#7#78884!YY&G&G$&G&G&GGGI I I x&&&&&r   c                     t           r   r{   r6   r   s     r   r   zDeviceVectorize._compile_core  r   r   c                     t           r   r{   )r6   r   s     r   r   zDeviceVectorize._get_globals  r   r   c                     t           r   r{   r6   fnobjr   s      r   r   zDeviceVectorize._compile_kernel  r   r   r   )r   r   r   r8   propertyr   r   r   r   r   r   r   rM   r   r   r   r   j  s        &*%r ' ' ' '    X< < < <(" " "' ' '" " "" " "" " " " "r   r   c                   H    e Zd Zddi dfdZed             Zd	dZd Zd ZdS )
DeviceGUFuncVectorizeNFrM   c                    |rt          d          |rt          d          |                    dd          st          d          |rUd                    d |                                D                       }d}t          |                    |                    || _        t          |          | _        || _        t          | j                  \  | _
        | _        t                      | _        d S )	Nr   zwritable_args are not supportedr   Tznopython flag must be Truer   c                 ,    g | ]}t          |          S rM   )repr)r   ks     r   r`   z2DeviceGUFuncVectorize.__init__.<locals>.<listcomp>  s    DDD!d1ggDDDr   z3The following target options are not supported: {0})rQ   r   r   keysr   r   r   r   r   r   inputsig	outputsigr   r   )	r6   r   r   r   r   r   writable_argsoptsr   s	            r   r8   zDeviceGUFuncVectorize.__init__  s     	86777 	?=>>>   T22 	:8999 	.99DD}/A/A/C/CDDDEEDGCCJJt,,---&x00(7(G(G%t~ %r   c                     | j         S r   r   rt   s    r   r   zDeviceGUFuncVectorize.pyfunc  r   r   c                    d | j         D             }d | j        D             }t          j        |          \  }}|t          j        d fv }|st          d| d| d          | j        j        }t          | j
        ||||          }|                     |          }	t          ||	           |	d                    |                   }
t          t          |||z                       }|                     |
t#          |                    }t%          |          }d	 |D             }t#          |d |                    }t#          || d                    }||f| j        |<   d S )
Nc                 ,    g | ]}t          |          S rM   r    r   xs     r   r`   z-DeviceGUFuncVectorize.add.<locals>.<listcomp>  s    000Q#a&&000r   c                 ,    g | ]}t          |          S rM   r   r   s     r   r`   z-DeviceGUFuncVectorize.add.<locals>.<listcomp>  s    222a3q66222r   z7guvectorized functions cannot return values: signature z specifies z return typez__gufunc_{name})r   )r   c                 Z    g | ](}t          j        t          |j                            )S rM   )r@   rG   r   r   s     r   r`   z-DeviceGUFuncVectorize.add.<locals>.<listcomp>  s*    ;;;Q"(3qw<<((;;;r   )r   r   r
   r   r	   nonerQ   r   r   expand_gufunc_templater   r   r   r   rg   _determine_gufunc_outer_typesr   r   r    r   )r6   r   indimsoutdimsr2   r   valid_return_typer   srcglblsr   outertysr   noutdtypesindtypes	outdtypess                    r   r   zDeviceGUFuncVectorize.add  s   00$-000224>222$8==k (EJ+==  	$ #),# #9D# # # $ $ $ <($T%:F%,h> > !!#&&S%'..H.==>5dFW<LMMNN%%ex%AA7||;;(;;;$((&$.))	#,f#4x   r   c                     t           r   r{   r   s      r   r   z%DeviceGUFuncVectorize._compile_kernel  r   r   c                     t           r   r{   r   s     r   r   z"DeviceGUFuncVectorize._get_globals  r   r   r   )	r   r   r   r8   r   r   r   r   r   rM   r   r   r   r     s        +/uB!' ' ' '0   X5 5 5 5@" " "" " " " "r   r   c              #      K   t          | |          D ]i\  }}t          |t          j                  r|                    |dz             V  ;|dk    rt          d          t          j        |dd          V  jd S )Nr   )rb   r   z,gufunc signature mismatch: ndim>0 for scalarA)rG   rb   layout)r!   r<   r	   Arraycopyr   )argtysdimsatnds       r   r  r    s      fd## < <Bb%+&& 	<''rAv'&&&&&&Avv !OPPP+BQs;;;;;;;< <r   c           	         ||z   }d t          t          |                    D             }d                    d                    d |D                                 }d t	          |||          D             }d t	          |t          |          d         ||t          |          d                   D             }	||	z   }
|                     |d                    |          |d                    |
                    }|S )	z"Expand gufunc source template
    c                 8    g | ]}d                      |          S )zarg{0}r   r   s     r   r`   z*expand_gufunc_template.<locals>.<listcomp>  s$    @@@q""@@@r   zmin({0})r   c                 8    g | ]}d                      |          S )z{0}.shape[0]r  r_   s     r   r`   z*expand_gufunc_template.<locals>.<listcomp>  s<     .A .A .A23 /=.C.CA.F.F .A .A .Ar   c                 8    g | ]\  }}}t          |||          S rM   _gen_src_for_indexingr   arefadimsatypes       r   r`   z*expand_gufunc_template.<locals>.<listcomp>  s@     I I I$$u $D%77 I I Ir   c                 8    g | ]\  }}}t          |||          S rM   r  r!  s       r   r`   z*expand_gufunc_template.<locals>.<listcomp>  s@     F F F%4 %T5%88 F F Fr   N)r   r2   
checkedargr   )rf   r    r   r   r!   )r   r  r  r   r3   argdimsargnamesr&  inputsoutputsr   r  s               r   r  r    sI    wG@@E#g,,,?,?@@@H""499 .A .A7?.A .A .A $B $B C CJI I(+Hfh(G(GI I IFF F),Xc&kkll-CW-5c&kkll-C*E *EF F FG H
//xdii.A.A%/#'99X#6#6  8 8C Jr   c                 L    d                     | t          ||                    S )Nz{aref}[{sliced}])r"  sliced)r   _gen_src_index)r"  r#  r$  s      r   r   r     s1    $$$,:5%,H,H % J J Jr   c                     | dk    rd                     dgdg| z  z             S t          |t          j                  r|j        dz
  | k    rdS dS )Nr   ,__tid__:r   z__tid__:(__tid__ + 1))r   r<   r	   r  rb   )r#  r$  s     r   r-  r-    s[    qyyxxseem3444	E5;	'	' EJNe,C,C '&yr   c                   4    e Zd ZdZed             Zd Zd ZdS )GUFuncEnginezZDetermine how to broadcast and execute a gufunc
    base on input shape and signature
    c                 &     | t          |           S r   r   )r   r   s     r   from_signaturezGUFuncEngine.from_signature  s    sOI..//r   c                     || _         || _        t          | j                   | _        t          | j                  | _        d S r   )sinsoutr    ninr  )r6   r   r   s      r   r8   zGUFuncEngine.__init__"  s2    	tx==	NN			r   c                 4   t          |          | j        k    rt          d          i }g }g }t          t	          || j                            D ]\  }\  }}|dz  }t          |          }t          |          |k     rd}	t          |	|fz            |r|| d          }
|d |          }nd}
|}t          t	          |
|                    D ]E\  }\  }}|t          |          z  }||v r"||         |k    rd}	t          |	||fz            |||<   F|                    |           |                    |
           g }| j        D ]F}g }|D ]}|                    ||                    |                    t          |                     Gd |D             }t          j        |          }||         }dg| j        z  }t          |          D ]5\  }}||k    r*|dk    s|dk    rd	||<   d
}	t          |	|dz   fz            6t          | ||||          S )Nz invalid number of input argumentr   z%arg #%d: insufficient inner dimensionrM   z$arg #%d: shape[%d] mismatch argumentc                 D    g | ]}t          t          j        |d           S r   )r   operatormul)r   ss     r   r`   z)GUFuncEngine.schedule.<locals>.<listcomp>T  s&    BBBa++BBBr   Fr   Tz!arg #%d: outer dimension mismatch)r    r9  rQ   r'   r!   r7  r   rB   r8  r   r@   argmaxGUFuncSchedule)r6   ishapes	symbolmapouter_shapesinner_shapesargnr^   symbols
inner_ndimr   inner_shapeouter_shapeaxisdimsymoshapesoutsigoshapesizes	largest_iloopdimspinnedr+   ds                           r   schedulezGUFuncEngine.schedule*  s   w<<48##>??? 	&/GTX0F0F&G&G 	- 	-"D"5'AIDWJ5zzJ&&= w/// $#ZKLL1#Lj[L1 #$-c+w.G.G$H$H % % jsCK((()## ~,,D(d|);<<<!$	#,,,,,,, i 	* 	*FF . .in----NN5==)))) CB\BBBIe$$		*48#l++ 	5 	5DAqH}}99R $F1II=C$SAE8^444  dL'8VLLLr   N)r   r   r   r   r   r5  r8   rU  rM   r   r   r3  r3    s\          0 0 [0# # #7M 7M 7M 7M 7Mr   r3  c                       e Zd Zd Zd ZdS )r@  c                     || _         || _        || _        | _        t	          t
          j        d          | _        || _        fd|D             | _	        d S )Nr   c                     g | ]}|z   S rM   rM   )r   r>  rR  s     r   r`   z+GUFuncSchedule.__init__.<locals>.<listcomp>p  s    <<<qhl<<<r   )
parentrA  rM  rR  r   r<  r=  loopnrS  output_shapes)r6   rY  rA  rM  rR  rS  s       ` r   r8   zGUFuncSchedule.__init__e  s]     HL(A66
<<<<G<<<r   c                 p     dd l }d} fd|D             }|                    t          |                    S )Nr   )rA  rM  rR  rZ  rS  c                 4    g | ]}|t          |          fS rM   )rH   )r   r   r6   s     r   r`   z*GUFuncSchedule.__str__.<locals>.<listcomp>v  s(    777A1gdA&&'777r   )pprintpformatr   )r6   r^  attrsvaluess   `   r   __str__zGUFuncSchedule.__str__r  sB    C7777777~~d6ll+++r   N)r   r   r   r8   rb  rM   r   r   r@  r@  d  s2        = = =, , , , ,r   r@  c                   >    e Zd Zd Zd Zd Zd Zd Zd Zd Z	d Z
d	S )
GeneralizedUFuncc                 0    || _         || _        d| _        d S )Ni   @)r   enginemax_blocksize)r6   r   rf  s      r   r8   zGeneralizedUFunc.__init__{  s    "$r   c                    |                      | j        j        | j        j        ||          }|                     |j        |j                  \  }}}}|                    |           |                    ||          }|	                                }	| 
                    ||	|          }
|                    ||j        |
           |                    |          S r   )_call_stepsrf  r9  r  	_scheduler)  r*  adjust_input_typesprepare_outputsprepare_inputsrp   launch_kernelrZ  post_process_outputs)r6   r2   r   	callstepsr  rU  r  r   r*  r)  
parameterss              r   __call__zGeneralizedUFunc.__call__  s    $$T[_dk6F%)30 0	04i/11 11-(Iv$$X...++Hi@@))++__Xvw??

CCC--g666r   c                    d |D             }| j                             |          }t          d |D                       }	 | j        |         \  }}n5# t          $ r( |                     |          }| j        |         \  }}Y nw xY wt          |j        |          D ]!\  }}	|	||	j        k    rt          d          "||||fS )Nc                     g | ]	}|j         
S rM   r]   r_   s     r   r`   z.GeneralizedUFunc._schedule.<locals>.<listcomp>  s    000A000r   c              3   $   K   | ]}|j         V  d S r   rX   r   s     r   r   z-GeneralizedUFunc._schedule.<locals>.<genexpr>  s$      11Q111111r   zoutput shape mismatch)
rf  rU  r   r   r   _search_matching_signaturer!   r[  r^   r   )
r6   r)  outsinput_shapesrU  r  r  r   sched_shaper   s
             r   rj  zGeneralizedUFunc._schedule  s    00000;''55 11&11111	9 $x 8Ivv 	9 	9 	9 66x@@H $x 8Ivvv	9 !$H$:D A A 	: 	:K;#)#;#; !89999f44s   A /BBc                     | j                                         D ]-}t          d t          ||          D                       r|c S .t	          d          )z
        Given the input types in `idtypes`, return a compatible sequence of
        types that is defined in `kernelmap`.

        Note: Ordering is guaranteed by `kernelmap` being a OrderedDict
        c              3   F   K   | ]\  }}t          j        ||          V  d S r   )r@   can_cast)r   rO   desireds      r   r   z>GeneralizedUFunc._search_matching_signature.<locals>.<genexpr>  sF       = =&vw ;vw// = = = = = =r   zno matching signature)r   r   rP   r!   rQ   )r6   idtypesr   s      r   rv  z+GeneralizedUFunc._search_matching_signature  sx     >&&(( 	5 	5C = =*-c7*;*;= = = = = 


 3444r   c                    |j         dk    s
J d            |j        sdn|j         }g }t          ||j                  D ]h\  }}|s7|j        dk    r,|                     ||          }|                    |           >|                    |                     |||                     ig }	t          ||j                  D ]&\  }
}|	                     |
j	        |g|R             't          |          t          |	          z   S )Nr   zzero looping dimensionr   )rZ  rR  r!   rA  size_broadcast_scalar_inputrB   _broadcast_arrayrM  r   r   )r6   rU  paramsretvalsodim	newparamspcsr   
newretvalsretvalrO  s               r   rp   zGeneralizedUFunc._broadcast  s-   ~!!!#;!!! )=qqx~	!122 	E 	EEAr E!&A++55a>>  ((((   !6!6q$!C!CDDDD
!'8+;<< 	= 	=NFFnfnT;F;;;<<<<Y%
"3"333r   c                    |f|z   }|j         |k    r|S t          |j                   t          |          k     rF|t          |j                    d          |j         k    s
J d            |                     ||          S  |j        | S )Nz+cannot add dim and reshape at the same time)r^   r    _broadcast_add_axisr   )r6   rI   newdiminnerdimnewshapes        r   r  z!GeneralizedUFunc._broadcast_array  s    9x'9  J ^^c(mm++S^^O,,-:::= ;::++C::: 3;))r   c                      t          d          )Nzcannot add new axisr{   )r6   rI   r  s      r   r  z$GeneralizedUFunc._broadcast_add_axis  s    !"7888r   c                     t           r   r{   r}   s      r   r  z(GeneralizedUFunc._broadcast_scalar_input  r   r   N)r   r   r   r8   rr  rj  rv  rp   r  r  r  rM   r   r   rd  rd  z  s        % % %
7 7 75 5 5.5 5 54 4 4&* * * 9 9 9" " " " "r   rd  c                       e Zd ZdZg dZed             Zed             Zed             Zed             Z	ed             Z
d Zd	 Zd
 Zd Zd ZdS )GUFuncCallStepsab  
    Implements memory management and kernel launch operations for GUFunc calls.

    One instance of this class is instantiated for each call, and the instance
    is specific to the arguments given to the GUFunc call.

    The base class implements the overall logic; subclasses provide
    target-specific implementations of individual functions.
    )r*  r)  _copy_result_to_hostc                     dS )zImplement the kernel launchNrM   )r6   r   nelemr2   s       r   rn  zGUFuncCallSteps.launch_kernel        r   c                     dS )zb
        Return True if `obj` is a device array for this target, False
        otherwise.
        NrM   rw   s     r   r:   zGUFuncCallSteps.is_device_array  r  r   c                     dS )z
        Return `obj` as a device array on this target.

        May return `obj` directly if it is already on the target.
        NrM   rw   s     r   r;   zGUFuncCallSteps.as_device_array  r  r   c                     dS )zK
        Copy `hostary` to the device and return the device array.
        NrM   )r6   r   s     r   r   zGUFuncCallSteps.to_device  r  r   c                     dS )zc
        Allocate a new uninitialized device array with the given shape and
        dtype.
        NrM   )r6   r^   rG   s      r   r   z%GUFuncCallSteps.allocate_device_array	  r  r   c           
          |                     d          }|_t          |          |||z   fvrId }d ||           d |||z              d |t          |                     d}t          |          |"t          |          |k    rt          d          |g|z  }d}g  _        |D ]a}	                     |	          r0 j                                                 |	                     d	}G j                            |	           bt           fd
|D                        }
|
o| _	         fdfd|D             }|d |          _
        ||d          }|r	| _        d S d S )Nr   c                     |  dd| dk    z   S )Nz positional argumentr>  r   rM   )ns    r   pos_argnz*GUFuncCallSteps.__init__.<locals>.pos_argn  s    AAQAAAr   zThis gufunc accepts z  (when providing input only) or z( (when providing input and output). Got .z<cannot specify argument 'out' as both positional and keywordTFc                 :    g | ]}                     |          S rM   )r:   )r   r   r6   s     r   r`   z,GUFuncCallSteps.__init__.<locals>.<listcomp>2  s'    "I"I"Iq4#7#7#:#:"I"I"Ir   c                 l                         |           rj        }nt          j        } ||           S r   )r:   r;   r@   rC   )r   convertr6   s     r   normalize_argz/GUFuncCallSteps.__init__.<locals>.normalize_arg;  s8    ##A&& %.*71::r   c                 &    g | ]} |          S rM   rM   )r   r   r  s     r   r`   z,GUFuncCallSteps.__init__.<locals>.<listcomp>C  s#    :::==++:::r   )getr    rQ   r   r*  r:   rB   r;   anyr  r)  )r6   r9  r  r2   kwargsr*  r  msgall_user_outputs_are_hostoutputall_host_arraysnormalized_argsunused_inputsr  s   `            @r   r8   zGUFuncCallSteps.__init__  s   **U## ?s4yysTz0CCCB B BD((3-- D D%-XcDj%9%9D D-5Xc$ii-@-@D D DC C.. 3t99s?? + , , , i$&G
 %)! 	, 	,F##F++ ,##D$8$8$@$@AAA,1))##F++++!"I"I"I"ID"I"I"IJJJ &5 &?%> 	!
	 	 	 	 	 ;:::T:::%dsd+ (- 	)(DLLL	) 	)r   c                 .   t          t          || j                            D ]q\  }\  }}||j        k    r^t	          |d          s1d                    t          |                    }t          |          |                    |          | j        |<   rdS )z
        Attempt to cast the inputs to the required types if necessary
        and if they are not device arrays.

        Side effect: Only affects the elements of `inputs` that require
        a type cast.
        astypezNcompatible signature is possible by casting but {0} does not support .astype()N)	r'   r!   r)  rG   hasattrr   typerQ   r  )r6   r  r+   ityvalr  s         r   rk  z"GUFuncCallSteps.adjust_input_typesK  s     's8T['A'ABB 	1 	1MAzScisH-- )<=CVDII=N=N #C..(!$CA	1 	1r   c                     g }t          |j        || j                  D ]:\  }}}|| j        r|                     ||          }|                    |           ;|S )z
        Returns a list of output parameters that all reside on the target
        device.

        Outputs that were passed-in to the GUFunc are used if they reside on the
        device; other outputs are allocated as necessary.
        )r!   r[  r*  r  r   rB   )r6   rU  r  r*  r^   rG   r  s          r   rl  zGUFuncCallSteps.prepare_outputs\  sp     $'(>	(,%6 %6 	# 	# E5&~!:~33E5AANN6""""r   c                 6      fdfd j         D             S )zZ
        Returns a list of input parameters that all reside on the target device.
        c                 b                         |           rj        }nj        } ||           S r   )r:   r;   r   )	parameterr  r6   s     r   ensure_devicez5GUFuncCallSteps.prepare_inputs.<locals>.ensure_deviceq  s:    ##I.. )..79%%%r   c                 &    g | ]} |          S rM   rM   )r   r  r  s     r   r`   z2GUFuncCallSteps.prepare_inputs.<locals>.<listcomp>y  s#    666Qa  666r   )r)  )r6   r  s   `@r   rm  zGUFuncCallSteps.prepare_inputsm  s;    	& 	& 	& 	& 	& 7666$+6666r   c                       j         r" fdt          | j                  D             }n j        d          j        }t          |          dk    r|d         S t	          |          S )a+  
        Moves the given output(s) to the host if necessary.

        Returns a single value (e.g. an array) if there was one output, or a
        tuple of arrays if there were multiple. Although this feels a little
        jarring, it is consistent with the behavior of GUFuncs in general.
        c                 B    g | ]\  }}                     ||          S rM   )r   )r   r  self_outputr6   s      r   r`   z8GUFuncCallSteps.post_process_outputs.<locals>.<listcomp>  sA     N N N.6; ||FK88 N N Nr   r   Nr   )r  r!   r*  r    r   )r6   r*  s   ` r   ro  z$GUFuncCallSteps.post_process_outputs{  s     $ 	#N N N N25gt|2L2LN N NGG\!_(lGw<<11:>>!r   N)r   r   r   r   	__slots__r   rn  r:   r;   r   r   r8   rk  rl  rm  ro  rM   r   r   r  r    s          I * * ^*   ^   ^   ^
   ^9) 9) 9)v1 1 1"  "7 7 7" " " " "r   r  )	metaclass)&r   abcr   r   collectionsr   r<  r   	functoolsr   numpyr@   numba.np.ufunc.ufuncbuilderr   r   
numba.corer	   r
   numba.core.typingr   numba.np.ufunc.sigparser   r   r$   r-   objectr/   r   r   r   r  r  r   r-  r3  r@  rd  r  rM   r   r   <module>r     s    ( ' ' ' ' ' ' ' # # # # # #             I I I I I I I I & & & & & & & & ' ' ' ' ' ' 3 3 3 3 3 3I I I H H H"  &Z" Z" Z" Z" Z"V Z" Z" Z"z  :" :" :" :" :"' :" :" :"zA" A" A" A" A"- A" A" A"H< < <  &J J J
	 	 	HM HM HM HM HM6 HM HM HMV, , , , ,V , , ,,a" a" a" a" a"v a" a" a"Hn" n" n" n" n" n" n" n" n" n" n"r   