o
    h@                     @   s  d dl mZ d dlmZmZ d dlmZ d dlmZm	Z	m
Z
mZmZmZmZmZ d dlZd dlmZ d dlm  mZ d dlmZ d dlZd dlmZ d dlm  mZ d dlZd dl Z d dl!Z!d dl"m#Z# de$d	ee$e$f fd
dZ%dee dej&de	e$e
f fddZ'dej&de	e$e
f dejj(fddZ)d6dejj(d	ejj(fddZ*dej(d	ej(fddZ+dej(deej& deej& deej& fddZ,ej-ej.ej/ej0ej1ej2ej3ej4ej5ej6ej4ej7ej8gZ9ej:ej;gZ<ej-ej=ej.ej>ej/d d! iZ?deej& de	e$ej(f fd"d#Z@deej& de	e$ej(f d$e	ej(ej(f fd%d&ZAG d'd( d(ZBd7d+d,ZCd-eBd	eDfd.d/ZEG d0d1 d1ZFdejGfdejj(d2ee	e$e
f  d3eejG d	ejj(fd4d5ZHdS )8    N)ArgumentTarget)fuse_conv_bn_eval)TypeDictAnyTupleIterableOptionalListcast)	ShapeProp)defaultdict)Enumtargetreturnc                 C   s*   |  dd^ }}|r|d |fS d|fS )zp
    Splits a qualname into parent path and last atom.
    For example, `foo.bar.baz` -> (`foo.bar`, `baz`)
    .   r    )rsplit)r   parentname r   X/var/www/html/ai/venv/lib/python3.10/site-packages/torch/fx/experimental/optimization.py_parent_name   s   r   patternnodemodulesc                 C   s   t |jdkr	dS |jd |f}t| |D ]2\}}t|tjs" dS |jdkr* dS t|jts3 dS |j|vr; dS t	||j |urG dS qdS )Nr   Fcall_moduleT)
lenargszip
isinstancefxNodeopr   strtype)r   r   r   nodesexpected_typecurrent_noder   r   r   matches_module_pattern   s    

r+   
new_modulec                 C   s<   t | jtsJ t| j\}}||| j< t|| || d S N)r"   r   r&   r   setattr)r   r   r,   parent_namer   r   r   r   replace_node_module,   s   
r0   Fmodelc                 C   s   t jt jft jt jft jt jfg}|st| } t	
| }t| }t|j}|D ]E}|jD ]?}t|||rot|jd jdkrCq0||jd j }||j }	|	jsTq0t||	}
t|jd ||
 ||jd  || q0q+t	||S )z
    Fuses convolution/BN layers for inference purposes. Will deepcopy your
    model by default, but can modify the model inplace as well.
    r   r   )nnConv1dBatchNorm1dConv2dBatchNorm2dConv3dBatchNorm3dcopydeepcopyr#   symbolic_tracedictnamed_modulesgraphr(   r+   r   r    usersr   track_running_statsr   r0   replace_all_uses_with
erase_nodeGraphModule)r1   inplacepatternsfx_modelr   	new_graphr   r   convbn
fused_convr   r   r   fuse2   s2   








rK   c                 C   s*   t | }G dd dtj j}|| S )z5
    Removes all dropout layers from the module.
    c                       s>   e Zd Zdedeedf deeef def fddZ	  Z
S )z&remove_dropout.<locals>.DropoutRemoverr   r    .kwargsr   c                    s:   t | j| tjrt|dksJ |d S t |||S )Nr   r   )r"   
submodulesr2   Dropoutr   superr   )selfr   r    rL   	__class__r   r   r   V   s   z2remove_dropout.<locals>.DropoutRemover.call_module)__name__
__module____qualname__r   r   r   r   r&   r   r   __classcell__r   r   rQ   r   DropoutRemoverU   s    6rW   )r#   r;   torchTransformer	transform)r1   rF   rW   r   r   r   remove_dropoutO   s   
r[   orig_moduler(   inputsoutputsc                    s|   t  }i  |D ]}||j}| |< q|D ]}|| fdd}| |< q| fdd|D  |  t | |S )z
    Given lists of nodes from an existing graph that represent a subgraph, returns a submodule that executes that subgraph.
    c                    s    |  S r-   r   )xenvr   r   <lambda>h       z"extract_subgraph.<locals>.<lambda>c                    s   g | ]} | qS r   r   ).0outputr`   r   r   
<listcomp>j       z$extract_subgraph.<locals>.<listcomp>)r#   Graphplaceholderr   	node_copyre   lintrC   )r\   r(   r]   r^   rG   inputnew_noder   r   r`   r   extract_subgraph^   s   

rn   c                 C   s
   t | S r-   )	th_mkldnnMkldnnBatchNorm)a_r   r   r   rb   {   s   
 rb   c                 C   s   i }| D ]9}|j dkr=t|jtsJ ||j }t|tv r=tt| |tj}t|tj	s0J t
|||< t||| q|S )z
    For each node, if it's a module that can be preconverted into MKLDNN,
    then we do so and create a mapping to allow us to convert from the MKLDNN
    version of the module to the original.
    r   )r%   r"   r   r&   r'   
mkldnn_maprX   floatr2   Moduler9   r:   r0   )r(   r   old_modulesr   
cur_moduler,   r   r   r   modules_to_mkldnn   s   

rx   rv   c                 C   sJ   | D ] }|j dkr"t|jtsJ ||j }||v r"t||||  qdS )za
    Maps each module that's been changed with `modules_to_mkldnn` back to its
    original.
    r   N)r%   r"   r   r&   r0   )r(   r   rv   r   rw   r   r   r   reset_modules   s   

ry   c                   @   s   e Zd ZdejfddZdS )MklSubgraphfx_graphc                 C   s   || _ g | _g | _g | _d S r-   )r{   r(   start_nodes	end_nodes)rP   r{   r   r   r   __init__   s   
zMklSubgraph.__init__N)rS   rT   rU   r#   rh   r~   r   r   r   r   rz      s    rz   
   r   c                    s*   dddt dtf fdd}|S )aW  
    This generates a heuristic that can be passed into `optimize_for_inference` that
    determines whether a subgraph should be run in MKL by running it with the example_inputs.

    Example usage:
        heuristic = gen_mkl_autotuner(example_inputs, iters=10)
        fast_model = optimization.optimize_for_inference(model, heuristic)
    Nr>   r   c                    s   | j }d u r| jj| jjt dd |D  tttj	 dd | j
D }t| j||fdd}| fdd}tjjt  | fdd}||k S )	Nc                 S   s   g | ]}t |jqS r   )rX   randnshaperd   r   r   r   r   rf      s    z@gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<listcomp>c                 S   s   g | ]}|j d  qS )r   )r    r   r   r   r   rf      s    c                    s<   t D ]}|   qt }t  D ]}|  }qt | S r-   )rangetime)frr   beginout)iterswarmupr   r   	benchmark   s   z?gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.benchmarkc                      s   dd dd  D  D S )Nc                 S      g | ]}|  qS r   )to_denserd   ir   r   r   rf      rg   zRgen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>.<locals>.<listcomp>c                 S   r   r   )	to_mkldnnr   r   r   r   rf      rg   r   r   sample_inputs	submoduler   r   rb      s    z>gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>c                      s     S r-   r   r   r   r   r   rb      rc   )r|   r{   owning_modulerv   r   	propagater   r   r#   r$   r}   rn   r(   ry   r>   r<   r=   )r>   input_nodesoutput_argsr   mkl_timeno_mkl_timeexample_inputsrF   r   rv   r   r   r   use_mkl_heuristic   s   z,gen_mkl_autotuner.<locals>.use_mkl_heuristic)rz   bool)r   r   r   r   r   r   r   gen_mkl_autotuner   s   	r   r>   c                 C   s   t | jdkS )z
    This is a heuristic that can be passed into `optimize_for_inference` that
    determines whether a subgraph should be run in MKL by checking if there
    are more than 2 nodes in it
       )r   r(   )r>   r   r   r   use_mkl_length   s   r   c                   @   sF   e Zd Zdd ZdefddZdedefddZd	ed
efddZdS )	UnionFindc                 C   s   d g| | _ dg| | _d S )Nr   r   size)rP   nr   r   r   r~      s   zUnionFind.__init__vc                 C   s   || j |< d| j|< d S )Nr   r   )rP   r   r   r   r   make_set   s   
zUnionFind.make_setr   c                 C   sB   | j | }||kr|S |d usJ | || j |< tt| j | S r-   )r   findr   int)rP   r   parr   r   r   r      s   
zUnionFind.findrq   bc                 C   sf   |  ||  |}}||kr|S | j| | j| k r ||}}|| j|< | j|  | j| 7  < d S r-   )r   r   r   )rP   rq   r   r   r   r   join   s   

zUnionFind.joinN)rS   rT   rU   r~   r   r   r   r   r   r   r   r   r      s
    r   pass_configtracerc              	      sD  dddt id}|du ri }|| |d rt| } |d r#t| } |d du r+| S t|d ts6td	d|d vr@td
|d d }| }|t	|  t
|j }t|  }G dd dt}t jD ]}	|j}
|	jdkr||	j }t|tv r|j}
t| d}|dur|jtjksJ d|jtdksJ dn|	jdkr|	jtv r|j}
n|	jtv r|j}
|
|jkr"|
|jkrtdd |	j D sqk !|	 t
"|	j  fdd}W d   n1 sw   Y  t#t$t
j%j& ||	_  '|	  (dd|	f}|	)| |	f|_ W d   n	1 sw   Y  qkt*t j|}| _+ jD ]B}	|	jdkrr|	jdkrr|	j d }t|	j,}|D ]}|jdkrc|jdkrc|)|  -| qKt.|	j,dkrr -|	 q1t. j}t/|fddt0 jD ]w\}}	|	jdkr|	jdkr||	_12| q|	jdkr|	jdkrĈ|	j d dusJ |	j d |	_3qfdd|	j4D }t.|dkrאqtdd |D rJ t5|}|d |	_6|dd D ]}7|d | qqt8 fd d} jD ]9}	t9|	d!r|:|	j6 j;|	 t9|	d"r1|:|	j1 j<;|	 t9|	d#rC|:|	j3 j=;|	 q|> D ](}||sp|j<|j= D ]}	|	j d }|	)|  -|	 qVt?|j|| qId} jD ]}	|	jdks|	jdkr|d7 }qwt@AtBCd$|   D  t
|  }|S )%a  
    Performs a set of optimization passes to optimize a model for the
    purposes of inference. Specifically, the passes that are run are:
    1. Conv/BN fusion
    2. Dropout removal
    3. MKL layout optimizations

    The third optimization takes a function `use_mkl_heuristic` that's used
    to determine whether a subgraph should be explicitly run in MKL layout.

    Note: As FX does not currently handle aliasing, this pass currently
    assumes nothing aliases. If that isn't true, use at your own risk.
    T	heuristic)conv_bn_fuser[   mkldnn_layout_optimizeNr   r[   r   Fz+mkldnn_layout_optimize config is not a dictz4Heuristic not found in mkldnn_layout_optimize configc                   @   s   e Zd ZdZdZdZdS )z*optimize_for_inference.<locals>.MklSupportr   r      N)rS   rT   rU   NOYESUNKNOWNr   r   r   r   
MklSupport  s    r   r   z)this pass is only for torch.float modulescpuz!this pass is only for CPU modulescall_functionc                 s   s    | ]}|j d kV  qdS )r   N)r   )rd   argr   r   r   	<genexpr>3  s    z)optimize_for_inference.<locals>.<genexpr>c                    s     d| fS )Nr   )call_methodr   r{   r   r   rb   6  s    z(optimize_for_inference.<locals>.<lambda>r   r   r   r   c                    s0   t | dr | jS t | dr | jS d S )Ncolorstart_color)hasattrr   r   r   r   )ufr   r   	get_colorS  s
   

z)optimize_for_inference.<locals>.get_colorc                    s,   g | ]}t |tjr |d ur |qS r-   )r"   r#   r$   r   )r   r   r   rf   n  s   , z*optimize_for_inference.<locals>.<listcomp>c                 s   s    | ]}|d u V  qd S r-   r   r   r   r   r   r   r  s    r   c                      s   t  S r-   )rz   r   r   r   r   rb   y  rc   r   r   	end_colorzmkldnn conversions: )Er   updaterK   r[   r"   r<   RuntimeErrortracer9   r:   r#   rC   rootr=   r   listr(   r   r%   r   r'   mkldnn_supportedr   next
parametersdtyperX   rt   devicemkldnn_supported_unknownr   anyr    inserting_beforemap_argr   r   r   r   inserting_aftercreate_noderA   rx   rv   r?   rB   r   r   	enumerater   r   r   all_input_nodessortedr   r   r   r   r   appendr|   r}   valuesry   logging	getLoggerrS   infork   )r1   r   r   default_pass_configr   
cur_tracerrF   r   r   r   supports_mkldnnrw   sample_parametermkldnn_argsdense_xrv   prv_noder?   user	num_nodescur_idx
cur_colorsother_colormkldnn_graphsr>   prvmkldnn_conversionsresultr   )r{   r   r   r   optimize_for_inference   s   
	




















r   )F)r   r   )Itorch.fxr#   torch.fx.noder   r   torch.nn.utils.fusionr   typingr   r   r   r   r	   r
   r   r   rX   torch.nnr2   torch.nn.functional
functionalFtorch.fx.passes.shape_propr   r9   collectionsr   torch.utils.mkldnnutilsmkldnnro   operatorr   r   enumr   r&   r   r$   r+   ru   r0   rK   r[   rn   r5   Linearr6   ReLU	MaxPool2d	AvgPool2dAdaptiveAvgPool2drelu	transposesigmoid
avg_pool2dadaptive_avg_pool2dr   addmulr   MkldnnConv2dMkldnnLinearrs   rx   ry   rz   r   r   r   r   Tracerr   r   r   r   r   <module>   s^    ($	$.	
"2
&