o
    h                     @   s   d dl Z d dlZd dlmZ d dlmZ d dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d	d
lmZ d	dlmZ e eZdd ZG dd deZdd Zdd Zdd ZeeedZeded dddZdS )    N)defaultdict)Set)GraphModule)partition_cudagraphs)StorageWeakRef)Module)tree_map   )aot_autograd)register_backendc                 C   s   t | tjr
|  S | S N)
isinstancetorchTensorclone)t r   W/var/www/html/ai/venv/lib/python3.10/site-packages/torch/_dynamo/backends/cudagraphs.pycloner   s   r   c                       sJ   e Zd ZU eed< ee ed<  fddZdZdZ	dZ
dZdd Z  ZS )	CudaGraphModulegmmutated_inputsc                    s   t    || _|| _d S r   )super__init__r   r   )selfr   r   	__class__r   r   r      s   

zCudaGraphModule.__init__FNc                 G   st  | j d ur;t|t| jksJ t| j|D ]	\}}|| q| j   | jD ]}|| | j|  q(tt| j	S | j
rdd |D | _tj | _ tj | j  | j| j | _	W d    n1 sew   Y  | j   | jD ]}|| | j|  qrtt| j	S tj }|tj  tj| | j| }W d    n1 sw   Y  tj | d| _
|S )Nc                 S      g | ]}|  qS r   r   .0xr   r   r   
<listcomp><       z,CudaGraphModule.__call__.<locals>.<listcomp>T)graphlenstatic_inputszipcopy_replayr   r   r   static_outputs	warmed_upr   cuda	CUDAGraphr   Streamwait_streamcurrent_streamstream)r   argsdstsrcir1   rr   r   r   __call__,   s4   





zCudaGraphModule.__call__)__name__
__module____qualname__r   __annotations__r   intr   r+   r$   r&   r*   r7   __classcell__r   r   r   r   r      s   
 r   c                 C   s   dd }t t}d}t }| jD ]h}|jdkr*|t||j  | |d7 }q|jdkrx|jt	j
u r6q|jj}t|jD ]8\}}|t|jk rP|j| }	n|j|jvrWq?|j|j }	d}
|jrh|jjrhd}
|
rw||t||	j  O }q?q|S )	Nc                 S   s   d| v r| d S | d S )Nvalfake_resultr   )metar   r   r   meta_fkW   s   z%find_input_mutations.<locals>.meta_fkr   placeholderr	   call_functionFT)r   setnodesopr   r@   _typed_storageaddtargetoperatorgetitem_schema	enumerate	argumentsr%   r2   namekwargs
alias_infois_write)grA   inputs	input_idxr   nschemar5   argargumentmut_argr   r   r   find_input_mutationsV   s8   



r[   c                 C   s\   | j jD ]'}|jdkr+|jrJ | |j}| |j t|j }| |jt	|| qd S )Ncall_module)
r$   rE   rF   rP   get_submodulerI   delete_submoduler[   add_submoduler   )r   rV   submodr   r   r   r   apply_cuda_graphs{   s   


ra   c                 C   s   t | |} t|  | S r   )r   ra   )modelrT   r   r   r   
cudagraphs   s   
rc   )fw_compilerbw_compiler)rO   compiler_fnTc                    s  t |ttfs	J  rdd |D nt|tj  tj }|tj  tj	| | |  W d   n1 s>w   Y  |  tj | tj  tj
 tjj|d |  W d   n1 spw   Y  t ttfsf fdd}|S )zBThis isn't registered as a backend, but is used in some benchmarksc                 S   s   g | ]}t |qS r   )r   
zeros_liker   r   r   r   r"      s    z$cudagraphs_inner.<locals>.<listcomp>N)r1   c                     sT   t t | ks
J  rt| D ]	\}}|| q  r(dd D S S )Nc                 S   r   r   r   r   r   r   r   r"      r#   z1cudagraphs_inner.<locals>.run.<locals>.<listcomp>)r%   r'   r(   r)   )
new_inputsr3   r4   copy_inputscopy_outputsr$   r&   r*   r   r   run   s   zcudagraphs_inner.<locals>.run)r   listtupler   r,   synchronizer.   r/   r0   r1   r-   r$   )rb   rT   rk   rj   r1   rl   r   ri   r   cudagraphs_inner   s*   





rp   )TT)loggingrJ   collectionsr   typingr   r   torch.fxr   #torch.fx.passes.backends.cudagraphsr    torch.multiprocessing.reductionsr   torch.nnr   torch.utils._pytreer   commonr
   registryr   	getLoggerr8   logr   r   r[   ra   rc   aot_cudagraphsrp   r   r   r   r   <module>   s*    
<%