o
    hM                     @   s$  d dl mZmZ d dlmZmZmZmZmZm	Z	 d dl
Z
d dlmZ d dlZd dlZd dlm  mZ d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dlmZ ddlmZmZ dd	l m!Z! d dl"Z"e!j#Z$d
d Z%dd Z&dd Z'dd Z(G dd dZ)e) Z*dd Z+dd Z,dd Z-dd Z.dd Z/dej0fdd Z1dej0fd!d"Z2dej0d#eej0ej0f fd$d%Z3d&d' Z4d(d) Z5d*ej6d#e7fd+d,Z8d-d. Z9e":dd/d0 Z;d1d2 Z<d3d4 Z=d5d6 Z>d7d8 Z?d9d: Z@	dHdej0d#eej0ej0f fd<d=ZAdId@e
jj0dAeBdBeBfdCdDZCdJdFdGZDdS )K    )is_sym_nodepy_sym_types)hint_intmagic_methodsmethod_to_operatorfree_symbolsis_symbol_binding_fx_nodefind_symbol_binding_fx_nodesNdefaultdict)graph_drawer)Tuple   )fx_graph_cseget_aten_target)configc                 C   s   | j ddS )N	recomputeF)metagetnode r   S/var/www/html/ai/venv/lib/python3.10/site-packages/torch/_functorch/partitioners.pymust_recompute   s   r   c                 C   s$   d}| j jD ]	}t|r dS qdS )NFT)graphnodesr   )fx_gfoundr   r   r   r   has_recomputable_ops   s   r   c                 C   s<   | j jD ]}t|rt|jdrtjj|jjv r dS qdS )NtagsTF)	r   r   r   hasattrtargettorchTagnondeterministic_seededr   )r   r   r   r   r   has_recomputable_rng_ops#   s
   $r%   c                 C   s6   t | jd tjtjfrdS t | jd tjsJ dS )Nvalr      )
isinstancer   r"   SymIntSymBoolSymFloatr   r   r   r   sym_node_size)   s   r,   c                   @   s   e Zd Zdd ZdS )InvalidNodeBasec                 C   s   dS )NzInvalid Noder   )selfr   r   r   __repr__0   s   zInvalidNodeBase.__repr__N)__name__
__module____qualname__r/   r   r   r   r   r-   /   s    r-   c           	         sr  t  }i  |D ]}||j}|j|_| |< q| jD ]X}||v r#q|jdkr-t |< q|jdkr\t	|j
|jfd } fdd|D }t|rOt |< q|| fdd |< q|jdkrn|| fd	d |< q|jd
krt	 qg }|D ]0}t|t jr| vrtd| dt | trJ d| d| |  qy|| qy|| |  |  |S )a  
    Given a graph, extracts out a subgraph that takes the specified nodes as
    inputs and returns the specified outputs.

    This includes specifying non-placeholder nodes as inputs.

    The general strategy is to initialize all inputs with proxies as we
    encounter them, and trace through the graph, only keeping values which take
    in valid proxies. Then, all dead code is eliminated.
    placeholdercall_functionr   c                    s&   g | ]}t |tjrt  | tqS r   )r(   fxNoder-   ).0xenvr   r   
<listcomp>S      & z6_extract_graph_with_inputs_outputs.<locals>.<listcomp>c                        |  S Nr   r8   r9   r   r   <lambda>W       z4_extract_graph_with_inputs_outputs.<locals>.<lambda>get_attrc                    r=   r>   r   r?   r9   r   r   r@   Y   rA   outputzNode z couldn't be found in envz was invalid, but is output)r5   Graphr3   namer   r   opInvalidNodepytreetree_flattenargskwargsany	node_copyr(   r6   RuntimeErrorr-   appendrC   eliminate_dead_codelint)	joint_graphinputsoutputs	new_graphr   new_nodeall_argsoutput_valuesr8   r   r9   r   "_extract_graph_with_inputs_outputs7   sF   







rY   c                 C   s(   | j dkod| jvot|  ot|  S Nr3   tangents)rF   r!   _is_bwd_seed_offset_is_fwd_seed_offsetr   r   r   r   
_is_primall   s   
r^   c                 C   s   | j dko	d| jv S rZ   rF   r!   r   r   r   r   _is_tangentt   s   r`   c                 C      | j dkod| jv pd| jv S )Nr3   bwd_seedbwd_base_offsetr_   r   r   r   r   r\   w      r\   c                 C   ra   )Nr3   fwd_seedfwd_base_offsetr_   r   r   r   r   r]   z   rd   r]   joint_modulec                C   s<   t dd | jjD d }|d | }||d  }||fS )Nc                 S   s   g | ]
}|j d kr|jqS rC   )rF   rJ   r7   r   r   r   r   r;          z,_extract_fwd_bwd_outputs.<locals>.<listcomp>r   )rH   rI   r   r   )rg   num_fwd_outputsrT   fwd_outputsbwd_outputsr   r   r   _extract_fwd_bwd_outputs~   s   rn   c                C   s  t | |d\}}ttt| jj}ttt| jj}ttt| jj}ttt| jj}	t	| j|| || | }
t	| j|| | |	 |}|jD ].}|j
dkrw|jsw|D ]}|j|jkrd||  nqU|D ]}|j|jkrv||  nqgqIt }g }g }|D ]}t|}|r|| || q|| qt| j}t|||D ],}d|jvrqt|jd | }t|dd dD ]}||vrq|||  q||O }q|  |||  t	| j|| || | }
t	| j|| | |	 |}t| |
}t| |}||fS )Nrk   r3   r&   c                 S   s   | j S r>   rE   )sr   r   r   r@      s    z*_extract_fwd_bwd_modules.<locals>.<lambda>key)rn   listfilterr^   r   r   r`   r]   r\   rY   rF   usersrE   removesetr   addrO   r	   	itertoolschainr   r   sortedclearextendr5   GraphModule)rg   saved_valuessaved_sym_nodesrk   rl   rm   primal_inputstangent_inputsfwd_seed_offset_inputsbwd_seed_offset_inputs	fwd_graph	bwd_graphr   saved_value	saved_symsaved_symbolssaved_sym_nodes_bindingsaved_sym_nodes_derivedsymbolsymbol_bindingsnew_symbolsrq   
fwd_module
bwd_moduler   r   r   _extract_fwd_bwd_modules   s|   








r   returnc                   sx  t | rt| ||dS ttt| jj}ttt| jj}|| }t| |d\}}t	| j||}dd |jD  g }	g }
| jjD ]]}|j
 vrHq@t|rR|
| q@d|jvru|jdkru|j}tdd |D sjJ |D ]}|	| qlq@ fdd	|jD }d|jv rtd
d |D r|D ]}|
| qq@|	| q@tdd |	D  }	tdd |
D  }
t| |	|
|dS )a  
    Partitions the :attr:`joint_module` in a manner that closely resembles the
    behavior observed in the original ``.forward()`` and ``.backward()`` of the
    callable, i.e., the resulting forward graph contains those operators that
    are executed in the original ``.forward()`` callable passed to
    :func:`aot_function`.

    The default partitioner collects the operators that are between the forward
    inputs and the forward outputs. This helps in finding the tensors which have
    to be stashed for the backward pass. These stashed tensors become the output
    of the generated forward graph. The remaining operators are then placed in
    the backward graph.

    .. warning::
        This API is experimental and likely to change.

    Args:
        joint_module(fx.GraphModule): The joint forward and backward graph. This
            is the result of AOT Autograd tracing.

    Returns:
        Returns the generated forward and backward Fx graph modules.
    ro   c                 S   s   h | ]
}|j d kr|jqS rh   rF   rE   ri   r   r   r   	<setcomp>  rj   z$default_partition.<locals>.<setcomp>tensor_metar4   c                 s   s    | ]	}|j tjkV  qd S r>   )r!   operatorgetitemr7   userr   r   r   	<genexpr>      z$default_partition.<locals>.<genexpr>c                    s   g | ]	}|j  vr|qS r   rp   r7   nforward_node_namesr   r   r;         z%default_partition.<locals>.<listcomp>c                 s   s    | ]}t |V  qd S r>   r   r   r   r   r   r         c                 S      i | ]}|d qS r>   r   r7   kr   r   r   
<dictcomp>&      z%default_partition.<locals>.<dictcomp>c                 S   r   r>   r   r   r   r   r   r   '  r   r   rk   )r   #min_cut_rematerialization_partitionrt   ru   r^   r   r   r]   rn   rY   rE   r   rO   r   rF   rv   allkeysr   )rg   _joint_inputsrk   r   r   rS   rl   rm   forward_only_graphr   r   r   rv   r   backward_usagesr   r   r   default_partition   s>   


	r   c                 C   s   d}| D ]}||9 }q|S Nr   r   )r8   rq   ir   r   r   _prod,  s   
r   c                 C   s
   | |j  S r>   )itemsize)numeldtyper   r   r   _tensor_nbytes2  s   
r   r   c                 C   s   d| j v rB| j d }t|trt|tjrdS dS t|ttfr)tdd |D S t|tjr9t	t
| |jS tdt| d| j v rX| j d }ttt|j}|j}ndS t	||S )	Nr&   r   i?B c                 s   s0    | ]}t |tjrtt| |jV  qd S r>   )r(   r"   Tensorr   r   r   r   r   r   r   r   r   >  s   . z_size_of.<locals>.<genexpr>zUnknown metadata type r   r   )r   r(   r   r"   r)   rt   tuplesumr   r   r   r   r   rN   typer   mapto_size_hintshape)r   r&   metadatar   r   r   r   r   _size_of5  s"   





r   c                 C   s\   ddl m} |t}| jD ]}|jdkr||jj  d7  < qtt|	 dd dd d S )	Nr   r
   r4   r   c                 S      | d S r   r   r?   r   r   r   r@   V  rA   z_count_ops.<locals>.<lambda>Trs   reverse)
collectionsr   intr   rF   r!   r0   printr|   items)r   r   cntr   r   r   r   
_count_opsP  s   

r   c                  C   sl   g } t tjjD ]+}ttjj|}t|tjjsq| D ]}t||}tj	j
|jv r2| |  nqq| S r>   )dirr"   opsatengetattrr(   _opsOpOverloadPacket	overloadsr#   	pointwiser   rO   )r   	attr_nameopoverloadpacketoverloadop_overloadr   r   r   pointwise_opsY  s   

r   c                    s   |  v r |  S | j dkrd | <  |  S | j dkr3| jd }|D ]}t|tjjjr0t|  q!d S  fdd| jD }t	|dkrFdg}t
|d  | <  |  S )Nr3   r   rC   c                    s&   g | ]}t |tjjjrt| qS r   )r(   r"   r5   r   r6   	get_depthr7   arg	depth_mapr   r   r;   |  r<   zget_depth.<locals>.<listcomp>r   )rF   rJ   r(   r"   r5   r   r6   r   all_input_nodeslenmax)r   r   rJ   r   
arg_depthsr   r   r   r   j  s"   



r   c                    s(    fdd| D }t | dd ddS )Nc                    s&   i | ]}t |tjjjr| | qS r   )r(   r"   r5   r   r6   r   r   r   r   r     r<   zsort_depths.<locals>.<dictcomp>c                 S   r   r   r   r?   r   r   r   r@     rA   zsort_depths.<locals>.<lambda>Tr   )r|   r   )rJ   r   r   r   r   r   sort_depths  s   r   c                    s&  t  i | jjD ]}|jdkr|j}|j|_||< q
i }t| jjD ]\}}|||< q(i  dd | jjD d }t	|   fddt
tt| jj}d}tj}|D ]}	|	jD ]}
||
 |k ro||
 }|
}qaq\|duswJ t
| jj|| d D ]}| qtj | }|S )a  
    This pass finds the first bwd node in the graph (by looking at users of
    tangents) and then reorders the graph by walking from this node to all the
    way to the end of the graph. At each op in this traveral, we insert this op
    in a new graph and try to bring only the relevant subgraph from the other
    non-bwd edges relevant for this op. This closely mimics the behavior of
    autograd engine.

    Why is this pass required in the first place?

    This is an artifact of how partitioners work today. The starting point of
    partitioner is a joint graph, which is fwd and then bwd graph. In the case
    of checkpointing, we keep portions of fwd graph in their original place in
    the joint graph, while obtaining a bwd graph. As a result, the resulting bwd
    graph has copies of recomputed fwd subgraphs followed by the original bwd
    graph. If we run this naively, this leads to bad memory footprint, because
    the fwd subgraphs are live for way longer duration than necessary. This pass
    reorders the operations such that we prioritize the ops for the original bwd
    graph while only realizing those ops from the fwd graph that are necessary
    at any given point in the graph.
    r3   c                 S      g | ]	}|j d kr|qS rh   rF   ri   r   r   r   r;     r   z7reordering_to_mimic_autograd_engine.<locals>.<listcomp>r   c                    sR   | v r|  S t | j D ]
\}}||< q| fdd| < |  S )Nc                    r=   r>   r   r?   r9   r   r   r@     rA   zSreordering_to_mimic_autograd_engine.<locals>.insert_node_in_graph.<locals>.<lambda>)r   r   rM   )r   r   _depthsr:   insert_node_in_graphrU   r   r   r     s   zAreordering_to_mimic_autograd_engine.<locals>.insert_node_in_graphN)r5   rD   r   r   rF   r3   rE   r   	enumerater   rt   ru   r`   mathinfrv   r"   r   )gmr   rV   orderidxoutput_noder   first_node_in_bwdminimum_ordertangentr   new_gmr   r   r   #reordering_to_mimic_autograd_engine  s<   




r   c               	   C   s  t  }dd }dd }dd }|| }||}	||}
t }| jjD ]*}t|rMt|jdrMtj	j
|jjv rM||j }|	|j }|
|j }||d||< q#tjjj}tjjj}|jjD ]}|jd	krld
|jv rl|} nq\g }| D ]\}}|d }|d }|j}||? |jd||jg|jR |jd}|jdtj|dfi d}|jdtj|dfi d}|| || || W d    n1 sw   Y  |j}|| dt| }||}||||jd< W d    n1 sw   Y  ||# |jd|||jg|jR |jd}|| || W d    n	1 s'w   Y  qsdd |jjD d }|jd }t|| }|d | | ||d   }|j | |j| |!  |!  ||fS )Nc                 S   sF   i }| j jD ]}|jdkr t|jdr tjj|jjv r |||j	< q|S )Nr4   r   )
r   r   rF   r    r!   r"   r#   r$   r   rE   )gmodrandom_nodesr   r   r   r   get_rng_ops  s   


z*functionalize_rng_ops.<locals>.get_rng_opsc                 S   sT   d| j vrdS | j d }t|ts|f}|D ]}t|tjr'|jjdkr' dS qdS )zV
        Check the example value of the node outputs to find the device type.
        r&   Ncudacpu)r   r(   r   r"   r   devicer   )r   
candidates	candidater   r   r   
get_device  s   


z)functionalize_rng_ops.<locals>.get_devicec                 S   s   | dkr	t j S t  S )Nr   )r"   r   get_rng_state)r   r   r   r   get_sample_rng_state  s   
z3functionalize_rng_ops.<locals>.get_sample_rng_stater   )fwdbwdr3   r   r   r   r4   )rJ   rK   r   r   rng_state_output_r&   c                 S   r   rh   r   ri   r   r   r   r;   O  r   z)functionalize_rng_ops.<locals>.<listcomp>)"rz   countdictr   r   r   r    r!   r"   r#   r$   r   rE   _prims	rng_primsrun_and_save_rng_staterun_with_rng_staterF   r   inserting_beforecreate_noderJ   rK   r   r   replace_all_uses_with
erase_noderO   nextr3   r   r   rC   	recompile) rg   	fw_module	bw_modulenum_sym_nodesuidr   r   r   joint_graph_rng_opsfw_graph_rng_opsbw_graph_rng_opsrecomputable_rng_ops_mapr   	base_nodefw_nodebw_noderun_and_save_rngr  bw_tangent_start_nodefw_rng_state_outputs	node_pairfw_graphfunctional_fw_nodestate
rng_outputbw_graph
state_namebw_rng_state_nodefw_output_node
fw_outputssym_node_start_idxrT   r   r   r   functionalize_rng_ops  s   










r&  c                 C   sL   | j jD ]}t|r#|jD ]}t|r"|jd |jd kr"d|jd< qq| S )a  
    If there are two consecutive checkpointed blocks with no operator in
    between, we would still want to stash the tensor at the boundary of
    checkpointed blocks. The following pass makes the last output node
    non-recomputable to allow for that.
    r   r   )r   r   r   rv   r   )rg   r   r   r   r   r   cleanup_recompute_tagsZ  s   

r'  inductorc          ,         s  zddl }W n ty } ztd|d}~ww | j  |   | j}tjr.t|}|| _| j}	t	| t
| }
r?t| } i | jjD ]}||j< qEfdd}|| \}}}t|dkrit| |dS t| jjD ]}|vryd|_qotd|_|jD ]}t|j|jd |_qqotjjtjj}g jjjjjjjjj j!j"j#j$j%j&j'j(j)j*j+j,j-j.j/j0j1j2j3j4j5j6j7j8j9j:j;j<j=j>j?j@jAjBjCjDjEjFjGjHjIjJjKjLjMjNjOjPjQjRjSjTjUjVjWjXjYjZj[j\j]t^j_j`jajbjc}j`jajdgd	kr|g |j|jejfjcjg|jh|jWjhji|jjjkjljmjnjojpjqjrjsjtjujvjwjxjyjzj{j|j}j~jj7 }jnjpjlj|jjjoj}g7 |jg7 }|7 }|t 7 }|jg7 }|d
d tD 7 }dur$tnt|jjjg}jjjjjjjjjjjjjg}|| t|B trsdd | jjD }|dd D  }td| t  d fdd fdd}fddfdddtffdd}| |	jD ]}|jdkrq||v rj|jd dtjd qt|st|r؈jd |jd tjd ||r|v rjd |jd tjd d!|jvrd"|jvp	d!|jv o	t|jd! tj }t|rt|}n|rtj}n||}j|jd |jd# |d |jD ]}j|jd# |jd tjd q0qz|d d\}}W n tyi   td$ td%|jj  w |\}
t }fd&d'|D D ]\}|
fd(d'|D  qzt } |D ]\}!}"|!dd) |"dd* ksJ |!dd) }#| |# qd+d, t| jjD 	tfd-d'| D 	fd.d/d0}$ttd1d/ |$}%ttd2d/ |$}$t| |$|%d3\}&}'r|
rt| |&|'t|%\}&}'t|'}'trltd4tWd5d |$D d  d6d |&jjD }(d7d |'jjD })|(|)@ }*tt}+|&jjD ]}|j|*v rIt|jd8rI|+t|jj  d7  < q.td9t|* d:t|( d:t|)  td;t|+ d<d/ d=d> |&|'fS )?ay  
    Partitions the joint graph such that the backward recomputes the forward.
    Recomputing helps in trading off memory bandwidth with computation.

    To create the fwd and bwd graph, we copy the joint graph, manually set the
    outputs to just original forward or backward outputs. And then we run the
    resulting graphs through dead code elimintation.

    .. warning::
        This API is experimental and likely to change.

    Args:
        joint_module(fx.GraphModule): The joint forward and backward graph. This
            is the result of AOT Autograd tracing.
        _joint_inputs: The inputs to the joint graph. This is unused.
        compiler: This option determines the default set of recomputable ops.
            Currently, there are two options: ``nvfuser`` and ``inductor``.
        recomputable_ops: This is an optional set of recomputable ops. If this
            is not None, then this set of ops will be used instead of the
            default set of ops.
        num_fwd_outputs: The number of outputs from the forward graph.

    Returns:
        Returns the generated forward and backward Fx graph modules.
    r   NzANeed networkx installed to perform smart recomputation heuristicsc           
         s   t   | jjD ] }|jdkrd|jv r | | v r'|jD ]} | qqttt	| jj}ttt
| jj}|| }t| d\}} dd |D  t| j||}fdd|jD  fdd| jjD }	| |	fS )	Nr3   r[   ro   c                 s   s    | ]	}|d ur|V  qd S r>   r   )r7   or   r   r   r     r   zNmin_cut_rematerialization_partition.<locals>.classify_nodes.<locals>.<genexpr>c                    s    h | ]}|j d kr |j qS rh   r   ri   name_to_noder   r   r     s    
zNmin_cut_rematerialization_partition.<locals>.classify_nodes.<locals>.<setcomp>c                    s    h | ]}|vr| vr|qS r   r   ri   required_bw_nodesrequired_fw_nodesr   r   r     s    )rx   r   r   rF   r!   ry   rv   rt   ru   r^   r]   rn   updaterY   )
rg   r   r   r   r   rS   rl   rm   r   unclaimed_nodes)r+  rk   r,  r   classify_nodes  s"   

z;min_cut_rematerialization_partition.<locals>.classify_nodesro   g    eAr   r(  c                 S      g | ]}t |qS r   )r   )r7   mr   r   r   r;     s    z7min_cut_rematerialization_partition.<locals>.<listcomp>c                 S   s.   h | ]}|j d krt|jdrt|jjqS )r4   _overloadpacket)rF   r    r!   strr4  ri   r   r   r   r     s
    
z6min_cut_rematerialization_partition.<locals>.<setcomp>c                 S   s   h | ]}t |qS r   )r5  r7   r   r   r   r   r         z#Ops banned from rematerialization: Fc                    sl   | h}t |dkr4| }|jD ]}|vr ||s dS |vr-t|v r-|| qt |dks	dS )Nr   TF)r   poprv   r   ry   )r   	cur_nodescurr   )
is_fusibler.  view_opsr   r   is_materialized_backwards  s   

zFmin_cut_rematerialization_partition.<locals>.is_materialized_backwardsc                    s   d| j v r| j d dkS  r| jdkot| v S | jdkr dS t| vr(dS | jtjkr0dS | jjjjjfv r=dS | rCdS sQdkrQ| j	t
jkrQdS tdd | jD }t| }|d	 |k S )
Nr   r   r4   FTr(  c                 s   s$    | ]}t |tjrt|V  qd S r>   )r(   r5   r6   r   r6  r   r   r   r      s   " zQmin_cut_rematerialization_partition.<locals>.ban_recomputation.<locals>.<genexpr>r'   )r   rF   r   r!   r   r   lift_fresh_copydefault
lift_freshdist_from_bwr   max_dist_from_bwr   rJ   r   )r   input_tensors_sizeoutput_size)AGGRESSIVE_RECOMPUTATIONr   compilergraph_has_recomputable_opsr=  recomputable_opsunrecomputable_opsr   r   ban_recomputation  s(   

	z>min_cut_rematerialization_partition.<locals>.ban_recomputationc                    s   t |  v ot | v S r>   )r   )ab)fusible_opsr   r   r;  $  s   z7min_cut_rematerialization_partition.<locals>.is_fusiblec                    s*    j dkrdS t fdd jD  S )Nr3   Tc                 3   s    | ]} |V  qd S r>   r   r   )r;  r   r   r   r   +  s    zOmin_cut_rematerialization_partition.<locals>.is_materialized.<locals>.<genexpr>)rF   r   rv   r   )r;  r   r   is_materialized'  s   
z<min_cut_rematerialization_partition.<locals>.is_materializedr   c                    s:   t | }t|dtt| jdd  } | r|S |d S )Ng?d   r      )r   r   r   minrA  )r   mem_sz)rN  r   r   get_node_weight-  s
   z<min_cut_rematerialization_partition.<locals>.get_node_weightrC   _insink)capacitysourcer&   r   _outz-Failed to compute min-cut on following graph:
c                 3   s    | ]	}| | fV  qd S r>   r   r   )nx_graphr   r   r   e  r   z6min_cut_rematerialization_partition.<locals>.<genexpr>c                 3   s     | ]}| v r|fV  qd S r>   r   )r7   v)non_reachableur   r   r   f  s    c                 S   s   i | ]\}}||qS r   r   )r7   r   r   r   r   r   r   o  s    z7min_cut_rematerialization_partition.<locals>.<dictcomp>c                 3   s    | ]} | V  qd S r>   r   ri   r*  r   r   r   p  r   c                    r=   r>   r   r?   )node_idxr   r   r@   p  rA   z5min_cut_rematerialization_partition.<locals>.<lambda>rr   c                 S   s   t | S r>   r   r   r   r   r   r@   r  rA   c                 S   s
   t |  S r>   r   ra  r   r   r   r@   s  s   
 r   z Theoretical Activations Stored: c                 S   r2  r   )r   r6  r   r   r   r;     r7  c                 S      h | ]
}|j d kr|jqS r4   r   ri   r   r   r   r     rj   c                 S   rb  rc  r   ri   r   r   r   r     rj   r4  z# remat/fw/bw: /zCount of Ops Rematerialized: c                 S   r   r   r   r?   r   r   r   r@     rA   Tr   )networkxImportErrorrN   r   rP   r  r   cser   r   r%   r'  r   rE   r   r   reversedrA  r   rv   rQ  r"   r   r   primsry   subdivatan2mulr   pow	remainderfmod__and____or____xor__
__lshift__
__rshift__eqnegegtleltabsbitwise_notceilfloorfracnegreluroundsilutruncloglog10log1plog2lgammaexpexpm1erferfccosacoscoshsinasinsinhtanatantanhatanhsqrtrsqrt
reciprocalsigmoidsoftplus	thresholdthreshold_backwardclampwherelerpaddcmulgelugelu_backwardr   mean_grad_sum_to_sizesum_to_sizeamaxtotype_asr   r   squeeze	unsqueezersub_to_copyaliasconvert_element_typeclone	full_likevarstdbroadcast_in_dimselectpermute_unsafe_viewviewexpandslicereshapebroadcast_tensorsscalar_tensorones	new_zerosr>  arangetriuvar_meanisinfrL   full
as_stridedzerosargmaxmaximumtindexr   
zeros_liker   rx   native_dropout	rand_like
randn_likemmconvolutionconvolution_backwardbmmaddmmupsample_bilinear2d_softmax_softmax_backward_datanative_layer_normnative_layer_norm_backwardnative_batch_normnative_batch_norm_backward_native_batch_norm_legitAOT_PARTITIONER_DEBUGr   DiGraphrF   add_edger   r   r^   r]   r   r(   r   r   r,   minimum_cut	Exceptionjoin	readwriteedgelistgenerate_edgelistr/  r   r|   rt   ru   r   r&  r   r   r    r!   r5  r4  r   ),rg   r   rF  rH  rk   nxer   	cse_graphfull_bw_graphgraph_has_recomputable_rng_opsr   r1  orig_fw_outputsr-  r0  r   ri  default_recomputable_ops
random_opscompute_intensive_opsjoint_module_opsops_ignoredrJ  rS  is_non_tensor_nodeweight	cut_value	partition	reachablecutsetnbrs	cut_nodesnode_innode_out	node_namer   r   r  r  fw_module_nodesbw_module_nodesremat_nodescountsr   )rE  r   rF  rM  rG  r;  rN  r=  r+  r`  r\  rk   rZ  rH  r.  r]  rI  r<  r   r   i  s
  


 
$
8
'




""
&r   fx_graphTtracedfnamefignamec           
      C   s   |rt | j}t| |} | jjD ]}i |_qtj	|\}}|s$d}t
d| |  t| |}| }	t|	d|d | |  d S )Nz.svgzWriting FX graph to file: write_.)copydeepcopyr   r5   r   r   r   ospathsplitextr   r   FxGraphDrawerget_main_dot_graphr   lstrip)
r  r  r  
clear_metarU   r   baseextgr8   r   r   r   
draw_graph  s   $r  full_graph.pngc                 C   s   t | | t| |S r>   )r  r   )r   joint_inputs	file_namer   r   r   draw_joint_graph  s   

r  )r(  N)r  T)r  )E"torch.fx.experimental.proxy_tensorr   r   %torch.fx.experimental.symbolic_shapesr   r   r   r   r   r	   r"   torch.fxr5   r   r   torch.utils._pytreeutils_pytreerH   r  r	  rz   sympyr   r   torch.fx.passesr   typingr   compile_utilsr   r    r   	functoolsdebug_partitionerr  r   r   r%   r,   r-   rG   rY   r^   r`   r\   r]   r   rn   r   r   r   r   r6   r   r   r   	lru_cacher   r   r   r   r&  r'  r   r5  r  r  r   r   r   r   <module>   st     5]
J	
K 
  '