
    J/Pha)                     (   d dl mZ d dlZd dlZd dlZd dlZddlmZm	Z	 ddl
mZmZmZ ddlmZ ddlmZmZ 	 daed	             Zd
 Z G d d          Z G d de          Z G d de          Z G d dej                  Z G d de          ZdS )    )contextmanagerN   )FakeCUDAArrayFakeWithinKernelCUDAArray)Dim3FakeCUDAModuleswapped_cuda_module   )normalize_kernel_dimensions)wrap_argArgHintc              #   L   K   t           
J d            | a 	 dV  da dS # da w xY w)z*
    Push the current kernel context.
    Nz)concurrent simulated kernel not supported_kernel_context)mods    [/var/www/html/test/jupyter/venv/lib/python3.11/site-packages/numba/cuda/simulator/kernel.py_push_kernel_contextr      sJ       ""$O"""O$s    #c                      t           S )zT
    Get the current kernel context. This is usually done by a device function.
    r        r   _get_kernel_contextr   $   s
     r   c                       e Zd ZdZd ZdS )FakeOverloadzE
    Used only to provide the max_cooperative_grid_blocks method
    c                     dS )Nr   r   )selfblockdims     r   max_cooperative_grid_blocksz(FakeOverload.max_cooperative_grid_blocks/   s	     qr   N)__name__
__module____qualname____doc__r   r   r   r   r   r   +   s-             r   r   c                       e Zd Zd ZdS )FakeOverloadDictc                     t                      S N)r   )r   keys     r   __getitem__zFakeOverloadDict.__getitem__6   s     ~~r   N)r   r   r    r'   r   r   r   r#   r#   5   s#            r   r#   c                   l    e Zd ZdZdg dfdZd Zd Zd Zd Zdd	Z	e
d
             Ze
d             ZdS )FakeCUDAKernelz(
    Wraps a @cuda.jit-ed function.
    Fc                     || _         || _        || _        || _        t	          |          | _        d | _        d | _        d| _        d| _	        t          j        | |           d S )Nr   )fn_device	_fastmath_debuglist
extensionsgrid_dim	block_dimstreamdynshared_size	functoolsupdate_wrapper)r   r+   devicefastmathr0   debugs         r   __init__zFakeCUDAKernel.__init__A   se    !z**  r*****r   c           	      t   	
  j         rCt           j        t                                5    j        | cd d d            S # 1 swxY w Y   t	           j         j                  \  }}t          || j                  }t          |          5  g 

 fd		fd|D             }t           j        |          5  t          j        | D ],}t           j        || j                  } |j        |g|R   -	 d d d            n# 1 swxY w Y   
D ]} |             	 d d d            d S # 1 swxY w Y   d S )Nc                    t          j        fdj        d | f          \  }} t          | t          j                  r.| j        dk    r#t          |                                         }nWt          | t                    r|                               }n,t          | t          j
                  rt          |           }n| }t          |t                    rt          |          S |S )Nc                       |j         | ddS )Nr   )r3   retr)prepare_args)ty_val	extensionr>   s     r   <lambda>z;FakeCUDAKernel.__call__.<locals>.fake_arg.<locals>.<lambda>b   s%    .Di.D !/# /# /# r   r   )r5   reducer0   
isinstancenpndarrayndimr   	to_devicer   voidr   r   )arg_retr>   r   s      r   fake_argz)FakeCUDAKernel.__call__.<locals>.fake_arg_   s    ")# # # # O3K 3 c2:.. 38a<<"3--11$77CCW-- ----CCRW-- ',,CCCc=11 :4S999
r   c                 &    g | ]} |          S r   r   ).0rJ   rM   s     r   
<listcomp>z+FakeCUDAKernel.__call__.<locals>.<listcomp>v   s!    7773#777r   )r,   r	   r+   r   r   r1   r2   r   r4   r   rE   ndindexBlockManagerr.   run)r   argsr1   r2   fake_cuda_module	fake_args
grid_pointbmwbrM   r>   s   `        @@r   __call__zFakeCUDAKernel.__call__O   sX   < 	&$TW.A.C.CDD & &tw~& & & & & & & & & & & & & & & & :$-:>.J J) *(I*.*=? ?!"233 $	 $	 D     . 8777$777I$TW.>?? 3 3"$*h"7 3 3J%dgxDKPPBBF:2	2222233 3 3 3 3 3 3 3 3 3 3 3 3 3 3   G$	 $	 $	 $	 $	 $	 $	 $	 $	 $	 $	 $	 $	 $	 $	 $	 $	 $	sG   
AA
A,D-<<D9D-D			D-D		D--D14D1c                     t          |d d          \  | _        | _        t          |          dk    r|d         | _        | S )Nr
         )r   r1   r2   lenr4   )r   configurations     r   r'   zFakeCUDAKernel.__getitem__   sG    'rr):; 	&t~ }"""/"2Dr   c                     d S r%   r   r   s    r   bindzFakeCUDAKernel.bind   s    r   c                     | S r%   r   )r   rT   s     r   
specializezFakeCUDAKernel.specialize   s    r   r   c                 J    |dk     rt          d|z            | |d||f         S )Nr   z0Can't create ForAll with negative task count: %sr   )
ValueError)r   ntaskstpbr3   	sharedmems        r   forallzFakeCUDAKernel.forall   s<    A::O%& ' ' 'FAvy011r   c                     t                      S r%   )r#   ra   s    r   	overloadszFakeCUDAKernel.overloads   s    !!!r   c                     | j         S r%   )r+   ra   s    r   py_funczFakeCUDAKernel.py_func   s	    wr   N)r   r   r   )r   r   r    r!   r:   rZ   r'   rb   rd   rj   propertyrl   rn   r   r   r   r)   r)   <   s          -2b + + + +/ / /b      2 2 2 2 " " X"   X  r   r)   c                   J     e Zd ZdZ fdZ fdZd Zd Zd Zd Z	d Z
 xZS )	BlockThreadzG
    Manages the execution of a function for a single CUDA thread.
    c                    |rfd}|}n}t          t          |                               |           t          j                    | _        d| _        || _        t          | | _	        t          | | _
        d | _        d| _        d| _        || _        t          | j        j         }| j
        j        |j        | j
        j        |j        | j
        j        z  z   z  z   | _        d S )Nc                  B    t          j        d            | i | d S )Nraise)divide)rE   seterr)rT   kwargsfs     r   debug_wrapperz+BlockThread.__init__.<locals>.debug_wrapper   s1    	))))4"6"""""r   )targetFT)superrq   r:   	threadingEventsyncthreads_eventsyncthreads_blocked_managerr   blockIdx	threadIdx	exceptiondaemonabortr9   
_block_dimxyz	thread_id)
r   rx   managerr   r   r9   ry   rz   blockDim	__class__s
    `       r   r:   zBlockThread.__init__   s     	# # # # # #FFFk4  )))888!*!2!2#( hy)

12)XZ4>;K;C:;?>;K<L<L .M Nr   c                    	 t          t          |                                            d S # t          $ r}dt	          | j                  z  }dt	          | j                  z  }t          |          dk    r|d|}n
|d|d|}t          j	                    d         } t          |          |          |f| _        Y d }~d S d }~ww xY w)Nztid=%szctaid=%s  z: r
   )r{   rq   rS   	Exceptionr/   r   r   strsysexc_infotyper   )r   etidctaidmsgtbr   s         r   rS   zBlockThread.run   s    	0+t$$((***** 
	0 
	0 
	0T$.111Cdm!4!44E1vv||!$ee,%(SS%%%3"B &d1ggcllB/DNNNNNNN
	0s   ', 
CBCCc                     | j         rt          d          d| _        | j                                         | j                                         | j         rt          d          d S )Nz"abort flag set on syncthreads callTz#abort flag set on syncthreads clear)r   RuntimeErrorr   r~   waitclearra   s    r   syncthreadszBlockThread.syncthreads   st    : 	ECDDD#' ##%%%$$&&&: 	FDEEE	F 	Fr   c                     | j         j        | j         j        | j         j        f}|| j        j        |<   |                                  t          j        | j        j                  }|                                  |S r%   )	r   r   r   r   r   block_stater   rE   count_nonzero)r   valueidxcounts       r   syncthreads_countzBlockThread.syncthreads_count   sj    n 0$.2BB).!#& !:;;r   c                     | j         j        | j         j        | j         j        f}|| j        j        |<   |                                  t          j        | j        j                  }|                                  |rdndS Nr   r   )	r   r   r   r   r   r   r   rE   allr   r   r   tests       r   syncthreads_andzBlockThread.syncthreads_and   s    n 0$.2BB).!#&vdm/00qqar   c                     | j         j        | j         j        | j         j        f}|| j        j        |<   |                                  t          j        | j        j                  }|                                  |rdndS r   )	r   r   r   r   r   r   r   rE   anyr   s       r   syncthreads_orzBlockThread.syncthreads_or   r   r   c                 (    d| j         d| j        dS )Nz
Thread <<<z, z>>>)r   r   ra   s    r   __str__zBlockThread.__str__   s     (,t~~~FFr   )r   r   r    r!   r:   rS   r   r   r   r   r   __classcell__)r   s   @r   rq   rq      s         N N N N N00 0 0 0 0
F 
F 
F            G G G G G G Gr   rq   c                       e Zd ZdZd Zd ZdS )rR   a  
    Manages the execution of a thread block.

    When run() is called, all threads are started. Each thread executes until it
    hits syncthreads(), at which point it sets its own syncthreads_blocked to
    True so that the BlockManager knows it is blocked. It then waits on its
    syncthreads_event.

    The BlockManager polls threads to determine if they are blocked in
    syncthreads(). If it finds a blocked thread, it adds it to the set of
    blocked threads. When all threads are blocked, it unblocks all the threads.
    The thread are unblocked by setting their syncthreads_blocked back to False
    and setting their syncthreads_event.

    The polling continues until no threads are alive, when execution is
    complete.
    c                     || _         || _        || _        || _        t	          j        |t          j                  | _        d S )N)dtype)	_grid_dimr   _fr.   rE   zerosbool_r   )r   rx   r1   r2   r9   s        r   r:   zBlockManager.__init__  s=    !#8IRX>>>r   c                 \    t                      }t                      }t                      }t          j         j         D ]^} fd}t	          | || j                  }|                                 |                    |           |                    |           _|r|D ]}}|j        r|                    |           |j	        rW|D ])}	d|	_
        d|	_        |	j                                          *|j	        d                             |j	        d                   ~||k    r3|D ]"}d|_        |j                                          #t                      }t          d |D                       }||D ]4}|j	        r+|j	        d                             |j	        d                   5d S )Nc                       j            d S r%   )r   )rT   r   s   r   rz   z BlockManager.run.<locals>.target  s    r   TFr   r   c                 :    g | ]}|                                 |S r   )is_alive)rO   ts     r   rP   z$BlockManager.run.<locals>.<listcomp>/  s%    HHHa!**,,HHHHr   )setrE   rQ   r   rq   r.   startaddr   r   r   r~   with_traceback)
r   rW   rT   threadslivethreadsblockedthreadsblock_pointrz   r   t_others
   ` `       r   rS   zBlockManager.run  s   %%ee:t7 	 	K     FD*k4;OOAGGIIIKKNNNOOA
  	J  H H( H"&&q))))[ 	H $+ 8 8(,6;31557777+a.77AGGG	H n,,' . .A,1A)'++----!$HH;HHHIIK'  	J,  	D 	DA{ Dk!n33AKNCCCD	D 	Dr   N)r   r   r    r!   r:   rS   r   r   r   rR   rR      sA         "? ? ?(D (D (D (D (Dr   rR   )
contextlibr   r5   r   r|   numpyrE   cudadrv.devicearrayr   r   	kernelapir   r   r	   errorsr   rT   r   r   r   r   r   r   dictr#   objectr)   Threadrq   rR   r   r   r   <module>r      s   % % % % % %     



         I I I I I I I I @ @ @ @ @ @ @ @ @ @ 0 0 0 0 0 0 $ $ $ $ $ $ $ $
  
 
 
             t   _ _ _ _ _V _ _ _HPG PG PG PG PG)" PG PG PGfAD AD AD AD AD6 AD AD AD AD ADr   