o
    hvx                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlZd dlZd dlZd dlmZ d dlmZmZmZmZmZmZmZmZmZmZmZ d dlmZ d dlZd dlZd dl m!Z!m"Z" d dl#m$Z$m%Z%m&Z& ddl'm(Z( dd	l)m*Z*m+Z+ e,e-Z.ed
Z/eej0ej0f Z1dd Z2e3dde4fddZ5e3dde4fddZ6dd Z7deeej8 e9f dej8fddZ:dd Z;dd Z<dee/ dee/ fddZ=de>de>de>fd d!Z?d"e>de>fd#d$Z@d%eee>ejAf  deej0 fd&d'ZBd%eee>ej0f  deee>ejAf  fd(d)ZCd*d+ ZDd,d- ZEdd.ed/ef d0e>deFfd1d2ZGdd6d7ZHd8d9 e!_Id:d9 e"_Id;ed<e9fd=d>ZJd;ed?ee9 fd@dAZKde>fdBdCZLdDdE ZMdFdG ZNdHdI ZOdJdK ZPdLdM ZQ	ddNeejRjS deejRjS fdOdPZTdQdR ZUdSej0de9fdTdUZVdVe9dejWfdWdXZXdSej0dYeeef dej0fdZd[ZYd\ej0d]e9fd^d_ZZd\ej0d`e9fdadbZ[dcdd Z\e j]deg dfe^ e^ e^ e^ gdgZ_ej`ddhdiZadee> fdjdkZbe3dldmdn ZcG dodp dpeZdG dqdr drZeG dsdt dtZfe3ddudv ZgdwdxdydzZhd{d| ZiG d}d~ d~Zjdd Zkdd Zlej`dd ZmdddZndd Zod ddejpde>de>fddZqdddZrdd Zsdd Ztdd Zudd Zvdej0dejwfddZxej`dd Zydd ZzdZ{zd dl|Z|W n e}y   dwZ{Y nw dd Z~dd Zdd Zdd Zdd Ze3ddd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd ZdS )    N)StringIO)AnyCallableDictIterableList
NamedTupleOptionalSetTypeVarUnion
ValuesView)mock)immutable_dictimmutable_list)CleanDivFloorDivModularIndexing   )config)current_deviceget_device_capability_Tc                  O   s>   t d dd }| \}}||vrd||< || i |d S )Nc                  S   sL   zddl m}  W n ty   tdw | t| jdd ur#dfS dfS )Nr   )do_benchzrequires Triton	quantilespercentiles)triton.testingr   ImportErrorNotImplementedErrorinspect	signature
parametersget)triton_do_bench r$   K/var/www/html/ai/venv/lib/python3.10/site-packages/torch/_inductor/utils.pyload_triton0   s   
zdo_bench.<locals>.load_triton)g      ?g?g?r   )	functools	lru_cache)argskwargsr&   r#   quantile_field_namer$   r$   r%   r   /   s   

r   returnc                  C   s@   t j sdS zdd l} | d uot dkW S  ty   Y dS w )NFr   )   r   )torchcudais_availabletritonr   r   )r1   r$   r$   r%   
has_tritonN   s   
r2   c                  C   s@   zddl m}  | d uotttjdd dW S  ty   Y dS w )Nr   	roi_aligntorchvisionr4   F)torchvision.opsr4   hasattrgetattrr.   opsr   r3   r$   r$   r%   has_torchvision_roi_alignZ   s   
r:   c                  G   s   t tjdd | D S )Nc                 S   s   g | ]}|r|qS r$   r$   .0xr$   r$   r%   
<listcomp>g   s    z'conditional_product.<locals>.<listcomp>)r'   reduceoperatormul)r)   r$   r$   r%   conditional_productf      rB   devicec                 C   sP   | d u r
t djS t| trt | } | jdkr&| jd u r&t jdt dS | S )Ng        r/   )index)r.   tensorrD   
isinstancestrtyperE   r   )rD   r$   r$   r%   decode_devicej   s   

rJ   c                 C   s   t tj| tdS )Nr   )r'   r?   r@   rA   sympyIntegeritr$   r$   r%   sympy_productt   s   rO   c                 C   s2   t | t |ks
J ttdd t| |D S )Nc                 s   s    | ]	\}}|| V  qd S Nr$   )r<   abr$   r$   r%   	<genexpr>z       zsympy_dot.<locals>.<genexpr>)lenrK   expandsumzip)seq1seq2r$   r$   r%   	sympy_dotx   s   r[   rN   c                 C   s   dd | D   S )Nc                 S   s   i | ]}t ||qS r$   )idr;   r$   r$   r%   
<dictcomp>~   s    zunique.<locals>.<dictcomp>)valuesrM   r$   r$   r%   unique}      r_   numerdenomc              	   C   sF   t | tr
t |tsJ |  dt|  d| dt| | |   S )Nz: , )rG   intrI   )ra   rb   r$   r$   r%   ceildiv   s    re   nc                 C   s`   | dksJ d| d8 } | | d? O } | | d? O } | | d? O } | | d? O } | | d? O } | d7 } | S )z9Return the smallest power of 2 greater than or equal to nl        z32-bit onlyr               r$   rf   r$   r$   r%   next_power_of_2   s   rl   lstc                 C   s   dd | D S )z
    Gets the shape and stride of a tensor. For non-symbolic tensors, this is
    trivial. But for symbolic tensors, we need to map from SymIntNode into
    sympy.Expr.
    c                 S   s*   g | ]}t |tjr|jjnt|qS r$   )rG   r.   SymIntnodeexprrK   rL   r<   ir$   r$   r%   r>      s    z-convert_shape_to_inductor.<locals>.<listcomp>r$   rm   r$   r$   r%   convert_shape_to_inductor   s   rt   c                    s   ddl m   fdd| D S )zz
    Takes a list of shapes from Inductor and converts them into symints (or just
    ints if all shapes are static).
    r   Vc                    sB   g | ]}t |tr|nt |tjrt|n	 jjjj|d dqS )N)hint)rG   rd   rK   rL   graphsizevars	shape_envcreate_symintnoderq   ru   r$   r%   r>      s    

z+convert_shape_to_symint.<locals>.<listcomp>)virtualizedrv   rs   r$   ru   r%   convert_shape_to_symint   s   
r}   c           
      C   s   t j }g }g }t|D ] \}}t|t jr(||d|  || q|| qtdd |	 D s;J |
| t||}t| jjdkrZt| jjd jdkrZ|f}|| t ji |}	|	|fS )Nargc                 s   s    | ]
}t |tj V  qd S rP   )rG   r.   Tensorr;   r$   r$   r%   rS      s    z$gen_gm_and_inputs.<locals>.<genexpr>r   r   r   )r.   fxGraph	enumeraterG   r   appendplaceholderallr^   call_functiontuplerU   _schemareturnsrH   rI   outputGraphModule)
targetr)   r*   gg_argsa_argsrf   r~   ro   gmr$   r$   r%   gen_gm_and_inputs   s    

r   c                   C   s   t j rt j  d S d S rP   )r.   r/   r0   synchronizer$   r$   r$   r%   r      s   
r   model.timesc                 C   sP   t   td t }t|D ]	}| | }t   qt }|d us$J || S )Ni9  )r   r.   manual_seedtimeperf_counterrange)r   example_inputsr   t0_resultt1r$   r$   r%   timed   s   
r   r$   
         ?c                    s>   t  fddt|D }t |}t|| d |S )Nc                    s   g | ]}t  qS r$   )r   )r<   r   r)   fnr   r$   r%   r>      s    z%print_performance.<locals>.<listcomp>z.6f)r.   rF   r   medianprint)r   r)   r   repeatbaselinetimingstookr$   r   r%   print_performance   s    
r   c                 C   s   t t|  S rP   )hashr   itemsselfr$   r$   r%   <lambda>   s    r   c                 C   s   t t| S rP   )r   r   r   r$   r$   r%   r      s    objmethodc                    s$   t | |  t| | fdd dS )zKReplace obj.method() with a new method that returns a precomputed constant.c                      s    S rP   r$   r$   r   r$   r%   r      s    z#precompute_method.<locals>.<lambda>N)r8   setattr)r   r   r$   r   r%   precompute_method   s   r   methodsc                 C   s   |D ]}t | | qdS )zFReplace methods with new methods that returns a precomputed constants.N)r   )r   r   r   r$   r$   r%   precompute_methods   s   r   c                 C   s   t | |kt | |k  S rP   )rd   )rQ   rR   r$   r$   r%   cmp   rC   r   c                 C   s&   t | dkrt| | d g| S | S )Nr   r   )rU   rI   )r=   sizer$   r$   r%   pad_listlike   s   r   c                    s*   d j  dt  fdd}|S )N___cachec                    s$   t | st|  |  t| S rP   )r7   r   r8   r   r   keyr$   r%   wrapper  s   

zcache_on_self.<locals>.wrapper)__name__r'   wraps)r   r   r$   r   r%   cache_on_self  s   r   c                 C   sJ   ddl m} t| trttjdd | D t S t| |j	r"| j
S t S )Nr   irc                 S   s$   g | ]}t |d r|jr|jjqS )ro   )r7   ro   origins)r<   ro   r$   r$   r%   r>     s    z%aggregate_origins.<locals>.<listcomp>) r   rG   listr'   r?   r@   or_setExternKernelr   )node_scheduler   r$   r$   r%   aggregate_origins  s   
	r   c                 C   s   t | }|dkrdd |D }tt|}nJ|dkrRg }|D ],}|jdkrJd|jv rJt|jd d tr?||jd d  q||jd d j qtt|}n|dkr^d	d |D }nt	|}d

dg| S )Noriginal_atenc                 S   s.   g | ]}|j d krd|jv r|jd jjqS )r   r   )opmeta_overloadpacketr   r<   originr$   r$   r%   r>   $  s
    z)get_fused_kernel_name.<locals>.<listcomp>r.   r   	source_fnr   inductor_nodec                 S   s   g | ]
}|j d kr|jqS r   )r   namer   r$   r$   r%   r>   5  s    r   fused)r   sortedr   r   r   rG   rH   r   r   r   join)r   descriptive_namesall_originssourcesr   r$   r$   r%   get_fused_kernel_name   s,   r   c                 C   s  t | }dd |D }tt}tt}|D ]-}d|jv r.t|jd j}|| |j d|jv rD|jd d d }|| |j q|j	 dd
t|  dd
t|  d	}g }	t| D ]\}
}|	|j	 d
|
 dd
t|  qg|d
|	fS )Nc                 S   s   g | ]	}|j d kr|qS r   )r   r   r$   r$   r%   r>   @      z'get_kernel_metadata.<locals>.<listcomp>r   	from_noder   z Source Nodes: [rc   z], Original ATen: [] z => 
)r   collectionsdefaultdictr   r   rH   r   r   r   commentr   r   keysr   )r   r   r   inductor_nodesfrom_node_dictoriginal_aten_dictro   r   metadatadetailed_metadataoriginal_nodenodesr$   r$   r%   get_kernel_metadata>  s,   



r   initial_queuec                 C   sZ   t | } t| }| r+|  }|jD ]}|r||rq||vr(|| | | q| s
|S )zJReturns the set of nodes whose values depend on those within initial_queue)r   r   popusersaddr   )r   skip_filterdominated_setro   userr$   r$   r%   dominated_nodesX  s   


	r   c                    sb   dd l }ddlm   fddfdd| D }fdd| D }t|jg ||R  S )	Nr   r   r   c                    sD   t |  jr| jS t |  jr| jS t |  jo!t |  jS rP   )rG   	TensorBoxdata
StorageBoxIRNode	Pointwiserk   r   is_unrealized_noder$   r%   r   p  s
   

z*gather_origins.<locals>.is_unrealized_nodec                       g | ]	} |r|j qS r$   r   )r<   valr   r$   r%   r>   w  r   z"gather_origins.<locals>.<listcomp>c                    r   r$   r   )r<   r~   r   r$   r%   r>   x  r   )	itertoolsr   r   r^   r   chain)r)   r*   r   kwarg_originsarg_originsr$   r   r%   gather_originsk  s   r  rp   c                 C   s   t | tjr	| jS t | tjrdtt| jS t | tj	r'dtt| jS t | t
ttfr@| jj ddtt| j dS t| S )z
    Normal sympy str is very slow, this is a lot faster.  The result are
    somewhat worse, as it doesn't do as much simplification.  So don't
    use this for final codegen.
    z + z * (rc   ))rG   rK   Symbolr   Addr   map	sympy_strr)   Mulr   r   r   funcr   rH   )rp   r$   r$   r%   r
  |  s   "r
  r   c                 C   s    | d dksJ t j| dddS )Nr   sT)integernonnegative)rK   r  r   r$   r$   r%   sympy_symbol  s   r  replacementsc                    s$   dd  |   fdd| D S )z=
    xreplace is faster than subs, but is way more picky
    c                 S   s   t | tr	t| S | S rP   )rG   rH   r  )r   r$   r$   r%   promote_strings  s   
z#sympy_subs.<locals>.promote_stringsc                    s   i | ]\}} | |qS r$   r$   )r<   kvr  r$   r%   r]     s    zsympy_subs.<locals>.<dictcomp>)xreplacer   )rp   r  r$   r  r%   
sympy_subs  s   r  rE   prefixc                       t  fdd| jD S )Nc                 3   s    | ]	}|j  V  qd S rP   )r   
startswithr<   r  r  r$   r%   rS     rT   z)free_symbol_startswith.<locals>.<genexpr>anyfree_symbols)rE   r  r$   r  r%   free_symbol_startswith  rC   r!  patternc                    r  )Nc                 3   s    | ]} |j v V  qd S rP   r  r  r"  r$   r%   rS     s    z"free_symbol_has.<locals>.<genexpr>r  )rE   r"  r$   r#  r%   free_symbol_has  rC   r$  c                 C   sD   h d}t  r|h d | jjD ]}t|j|v r dS qdS )N>   aten.multinomial.defaultfbgemm.dense_to_jagged.default%fbgemm.jagged_to_padded_dense.default,aten._fused_moving_avg_obs_fq_helper.default7aten._fused_moving_avg_obs_fq_helper_functional.defaultrun_with_rng_staterun_and_save_rng_state>   aten.scatter.srcaten.scatter_add_aten.scatter.reduceaten.index_put.defaultaten.index_put_.defaultaten.scatter_reduce.twoaten.scatter_add.defaultaten.scatter_reduce_.twoaten.scatter.value_reduceaten.scatter_reduce.two_outaten._unsafe_index_put.defaultTF)r.   $are_deterministic_algorithms_enabledupdaterx   r   rH   r   )r   forbidden_setro   r$   r$   r%   has_incompatible_cudagraph_ops  s   	r:  instance_descriptor)divisible_by_16
equal_to_1ids_of_folded_argsdivisible_by_8)defaultsc              
   #   s   t  u}tjtjd|iR tj|d tjtjd i1 dV  t	| trLt
| dks5J dtj rLt }|  fdd|D  W d   n1 sVw   Y  W d   n1 sew   Y  W d   dS W d   dS 1 s}w   Y  dS )	z
    Contextmanager that provides a clean tmp cachedir for inductor.

    Optionally, pass a dict as 'cache_entries' to get a list of filenames and sizes
    generated with this cache instance.
    TORCHINDUCTOR_CACHE_DIRr1   TRITON_CACHE_DIRNr   z!expected empty cache_entries dictc              	      s,   i | ]}d |vr|t jt j |qS )z.lock)ospathgetsizer   )r<   ftriton_cache_dirr$   r%   r]     s
    z(fresh_inductor_cache.<locals>.<dictcomp>)tempfileTemporaryDirectoryr   patchdictrC  environrD  r   rG   rU   existslistdirr8  )cache_entriesinductor_cache_dirfilesr$   rG  r%   fresh_inductor_cache  s0   




"rS  c                 C   s(   | j }tt| }ttt||ddS )NT)r   reverse)__getitem__r   rU   r   reversedr   )seqgettera_rr$   r$   r%   argsort  s   rZ  ri   c                 C   s   t jd| d S )Nr$   dtype)r.   emptyelement_sizer[  r$   r$   r%   get_dtype_size  s   r_  c                   @   s   e Zd ZU eed< dS )LineContextcontextN)r   
__module____qualname__r   __annotations__r$   r$   r$   r%   r`    s   
 r`  c                   @   sn   e Zd ZdZdddZdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd ZdddZdddZdS )IndentedBufferrh   r   c                 C   s   g | _ || _d S rP   )_lines_indent)r   initial_indentr$   r$   r%   __init__  s   
zIndentedBuffer.__init__c                 C   s   t  }d}g }| jD ]8}t|tr| }|d u rq
nt|tr(|||jf q
t|ts/J || |d |d|	d 7 }q
|
 |fS )Nr   r   )r   rf  rG   DeferredLineBaser`  r   ra  rH   writecountgetvalue)r   bufplinemapliner$   r$   r%   getvaluewithlinemap  s"   




z"IndentedBuffer.getvaluewithlinemapc                 C   s   |   \}}|S rP   )rr  )r   r  r   r$   r$   r%   rm    s   zIndentedBuffer.getvaluec                 C   s   t  }| jD ]6}t|tr| }|d u rqnt|trqt|ts#J |dr2||d d  q|| |d q| S )N\r   )	r   rf  rG   rj  r`  rH   endswithrk  rm  )r   rn  rq  r$   r$   r%   getrawvalue  s   




zIndentedBuffer.getrawvaluec                 C   s   | j   d S rP   )rf  clearr   r$   r$   r%   rw  0     zIndentedBuffer.clearc                 C   
   t | jS rP   )boolrf  r   r$   r$   r%   __bool__3     
zIndentedBuffer.__bool__c                 C   s   d| j | j  S )Nr   )rg  tabwidthr   r$   r$   r%   r  6     zIndentedBuffer.prefixc                 C   sr   t |tr| j| d S t |tr| j||   d S | r1| j|   |  d S | jd d S Nr   )rG   r`  rf  r   rj  with_prefixr  stripr   rq  r$   r$   r%   	writeline9  s   

zIndentedBuffer.writelinec                 C   s   |D ]}|  | qd S rP   )r  )r   linesrq  r$   r$   r%   
writelinesC  s   zIndentedBuffer.writelinesr   c                    s   t j fdd}| S )Nc                	   3   s<     j  7  _ zd V  W  j  8  _ d S  j  8  _ w rP   )rg  r$   offsetr   r$   r%   ctxH  s
   "z"IndentedBuffer.indent.<locals>.ctx)
contextlibcontextmanager)r   r  r  r$   r  r%   indentG  s   zIndentedBuffer.indentFc                 C   s   t |trJtd}|jD ]}t |ts"|r"t|t|t|  }qt	|r*d}|jD ]}t |tr;| j
| q-t| |t|d   q-d S t|}|rU| }|sYd S | }|dD ]}| | qbd S )Ninfr   r   )rG   re  floatrf  r`  minrU   lstripmathisinfr   r  rd   textwrapdedentrstripsplit)r   
other_coder  r  rq  r$   r$   r%   spliceR  s,   





zIndentedBuffer.spliceN)r   r   )F)r   rb  rc  r}  ri  rr  rm  rv  rw  r{  r  r  r  r  r  r$   r$   r$   r%   re    s    


re  c                   @   sd   e Zd ZdZdd Zdee fddZdedd fdd	Zd
d Z	dd Z
dd Zdd Zdd ZdS )rj  z.A line that can be 'unwritten' at a later timec                 C   s   |  sd}|| _d S r  )r  rq  r  r$   r$   r%   ri  m  s   
zDeferredLineBase.__init__r,   c                 C      t  )zJReturns either self.line or None to indicate the line has been 'unwritten'r   r   r$   r$   r%   __call__r     zDeferredLineBase.__call__rq  c                 C   r  )z3Returns a new deferred line with the same conditionr  r  r$   r$   r%   	_new_linev  r  zDeferredLineBase._new_linec                 C   s   |  | | j S rP   r  rq  )r   r  r$   r$   r%   r  z     zDeferredLineBase.with_prefixc                 C   s   |  | j S rP   )r  rq  r  r   r$   r$   r%   r  }  r~  zDeferredLineBase.lstripc                 C   s   |  | j| S rP   r  )r   rE   r$   r$   r%   rU    r~  zDeferredLineBase.__getitem__c                 C   ry  rP   )rz  rq  r   r$   r$   r%   r{    r|  zDeferredLineBase.__bool__c                 C   ry  rP   )rU   rq  r   r$   r$   r%   __len__  r|  zDeferredLineBase.__len__N)r   rb  rc  __doc__ri  r	   rH   r  r  r  r  rU  r{  r  r$   r$   r$   r%   rj  j  s    rj  c                 C   s(   t j| j}|dk rtd dS dS )NP   z,not enough SMs to use max_autotune_gemm modeFT)r.   r/   get_device_propertiesmulti_processor_countlogwarning)rE   smsr$   r$   r%   
is_big_gpu  s
   
r  F)enable_int32c                C   st   t jt jt jg}|rt jt jt jt jg}tjptjptjo9dtj	
 dv o9| jjdko9| j|v o9t| jjp8dS )NTRITON,r/   r   )r.   float16bfloat16float32int32r   max_autotunemax_autotune_gemmsearch_autotune_cachemax_autotune_gemm_backendsupperr  rD   rI   r\  r  rE   )layoutr  layout_dtypesr$   r$   r%   use_triton_template  s    
r  c                   C   s   dt j dv S )NATENr  )r   r  r  r  r$   r$   r$   r%   use_aten_gemm_kernels  r  r  c                   @   s.   e Zd ZedZdd Zdd Zdd ZdS )	DebugDirManagerr   c                 C   s   t tj| _d | _d S rP   )nextr  counterr\   prev_debug_namer   r$   r$   r%   ri    s   
zDebugDirManager.__init__c                 C   s0   t jjj| _| j d| j | _| jt jj_d S )N_tmp_)r.   _dynamor   debug_dir_rootr  r\   new_namer   r$   r$   r%   	__enter__  s   zDebugDirManager.__enter__c                 G   s   t | j | jtjj_d S rP   )shutilrmtreer  r  r.   r  r   r  )r   r)   r$   r$   r%   __exit__  s   zDebugDirManager.__exit__N)	r   rb  rc  r   rl  r  ri  r  r  r$   r$   r$   r%   r    s
    
r  c                    sz   ddl m} |j g  fdd}tj|d| tj  | |i |}W d    |fS 1 s4w   Y  |fS )Nr   )GraphLoweringc                    sF    | }t |j}|  W d    |S 1 sw   Y  |S rP   )open__file__r   read)r   modrF  compile_to_modulesource_codesr$   r%   patched_compile_to_module  s   
z3run_and_get_code.<locals>.patched_compile_to_moduler  )	rx   r  r  r   rK  objectr.   r  reset)r   r)   r*   r  r  r   r$   r  r%   run_and_get_code  s   

r  c                 O   sN   t | g|R i |\}}dt|  krdks#n J dt| |d S )Nr   rg   z%expected one or two code outputs got r   )r  rU   )r   r)   r*   r   r  r$   r$   r%   run_and_get_triton_code  s
   r  c              	   c   sN    ddl m} |j|  }zt|||j| < dV  W ||j| < dS ||j| < w )z~
    Override the lowering of aten_op with overide_fn.
    The first argument of override_fn is the original lowering fn.
    r   )loweringN)torch._inductorr  	loweringsr'   partial)aten_opoverride_fnr  orig_fnr$   r$   r%   override_lowering  s   
r  c                    s4   ddl m} |j  fdd}tjj|d|S )zr
    Add hook functions to be called at the beginning and end of Scheduler.__init__.
    Used for unit tests.
    r   )	Schedulerc                    s&   | |  | |}r| | |S rP   r$   )	schedulerr   outr  post_fnpre_fnr$   r%   r     s
   


z(add_scheduler_init_hook.<locals>.wrapperri  )torch._inductor.schedulerr  ri  unittestr   rK  r  )r  r  r  r   r$   r  r%   add_scheduler_init_hook  s   r  c                 C   s"   t jr
t|  dS t|  dS )z
    Warnings that will be actionable for PyTorch developers, but not
    end users.  Allows us to easily disable them in stable releases but
    keep them on for nightly builds.
    N)r   developer_warningsr  r  infomsgr$   r$   r%   developer_warning  s   r  num_in_out_argsr)   r  c                    s   t  fddt|D S )z
    Return the total number of bytes the arguments of tensor type takes.

    For in/out args, tensor sizes are counted twice: once for reading and
    once for writing.

    The first num_in_out_args arguments are in out tensors.
    c                 3   s@    | ]\}}t |tjr| |  d t| k   V  qdS r   N)rG   r.   r   numelr^  rd   )r<   rr   r~   r  r$   r%   rS     s    

z get_num_bytes.<locals>.<genexpr>)rW   r   )r  r)   r$   r  r%   get_num_bytes  s   	r  r   c                 C   s   | | dd|dd|dd| }zdd l }| dkr,|dk r/|jj| |jj }W |S W |S W |S  ty@   td	 Y |S w )
Nz.3fzms    	z GB 	 z7.2fzGB/sr   g~jt?i  z@Colorama is not installed. Install it if you want colored output)coloramaForeREDRESETr   r  r  )msnum_gbgb_per_sr  suffixinfo_strr  r$   r$   r%   create_bandwidth_info_str  s   $r  c                  C   s   z/t jd} | d tt jk r.tt j| d  dkr.t j| d  d dkr.t j| d  W S W n	 ty8   Y nw t jD ]}|drM|tdd   S q<dS )a  
    An experimental API used only when config.benchmark_kernel is true.

    The benchmark name is only available at codegen time. So we can not
    directly call it in benchmark_all_kernels which is run after codegen.

    The function assumes the argument after --only is the benchmark name.
    It works for torchbench.py/hugginface.py/timm_models.py. But for ad-hoc
    scripts, this function may return None.

    There are 2 flavors of --only argument we need handle:
    1. --only model_name
    2. --only=model_name
    z--onlyr   r   -z--only=N)sysargvrE   rU   
ValueErrorr  )idxr~   r$   r$   r%   get_benchmark_name"  s   

r  c                 C      t dd | D S )Nc                 s       | ]}|d kV  qdS r  r$   r;   r$   r$   r%   rS   B      zis_ones.<locals>.<genexpr>r   r   r$   r$   r%   is_onesA  r`   r  c                 C   r  )Nc                 s   r  )r   Nr$   r;   r$   r$   r%   rS   F  r   zis_zeros.<locals>.<genexpr>r  r  r$   r$   r%   is_zerosE  r`   r  c                 C   r  )Nc                 s   s,    | ]}t |tjr|jtd kV  qdS )cpuN)rG   r.   r   rD   )r<   itemr$   r$   r%   rS   J  s    

z is_cpu_device.<locals>.<genexpr>r  )inputsr$   r$   r%   is_cpu_deviceI  s   r  r   c                 C   s&   t | tjs
J d| jrtjS tjS )Nz8only support sympy.Expr as input to get_sympy_Expr_dtype)rG   rK   Expr
is_integerr.   int64float64)r   r$   r$   r%   get_sympy_Expr_dtypeQ  s   r  c                 o   sN    | r"t jj|i |}|V  W d    d S 1 sw   Y  d S d V  d S rP   )r.   profilerprofile)should_profiler)   r*   ro  r$   r$   r%   maybe_profile[  s   "
r  c                 C   s6   t | j }|d| jf |d| jf t|S )z~
    Convert triton config to a tuple that can uniquely identify it. We can use
    the return value as a dictionary key.
    	num_warps
num_stages)r   r*   r   r   r  r  r   )cfgr   r$   r$   r%   triton_config_to_hashabled  s   r  Tc                 C   s$   t s| S ttj| |  tjj S rP   )HAS_COLORAMAr8   r  r  r  r  )r  colorr$   r$   r%   _color_textv  s   r  c                 C   
   t | dS )Ngreenr  r  r$   r$   r%   
green_text}  r|  r  c                 C   r  )Nyellowr  r  r$   r$   r%   yellow_text  r|  r  c                 C   r  )Nredr  r  r$   r$   r%   red_text  r|  r   c                 C   r  )Nbluer  r  r$   r$   r%   	blue_text  r|  r"  c               
   C   s.   ddl m}  tjdtjdtdtd| jdi}|S )Nr   r   rd   Devicerz  r  r   )r   r   r.   r\  rD   rz  r  r   )r   PYTHON_TYPE_TO_SCHEMA_TYPEr$   r$   r%   python_type_to_schema_type  s   r%  c                 C   s   |rd|  dS | S )Nz	Optional[r   r$   )schema_typeis_optional_argr$   r$   r%   may_get_optional_schema_type  r  r(  c                 C   sh   t | trtdd | D rtd|}|t|kS dS | jt v r2t | j }t||}|t|kS dS )Nc                 s   s,    | ]}t |tpt |tjo|jV  qd S rP   )rG   rd   rK   r  r
  r;   r$   r$   r%   rS     s
    
ztype_match.<locals>.<genexpr>z	List[int]F)rG   r   r   r(  rH   	__class__r%  )r~   arg_typer'  may_optional_schema_typer&  r$   r$   r%   
type_match  s    
r,  c                    sH  d}d}| j D ]}| s|d7 }|js|d7 }qt|}t|}d}dd }	dd   fdd}
t||ksAJ |	t|||| j D ]W}d }d	}||k rY|jrT d	S || }n|rg|j|v rg||j }d
}|d u rr|
|sr d	S |d ur|j}t|| |s d	S |s|d7 }qD|d u r |s|d ur|d8 }qD|dkrd	S d
S )Nr   r   c                 S   s2   ||krd| d| d|  dS d| d|  dS )Nztakes from z to z positional arguments but z were givenztakes r$   )nargsmax_pos_argsmin_argsr$   r$   r%   args_error_message  s   z(schema_match.<locals>.args_error_messagec                 S   s   dt | jv S )Nr	   )rH   rI   r~   r$   r$   r%   is_optional  rx  z!schema_match.<locals>.is_optionalc                    s    | p|   S rP   )has_default_valuer1  r2  r$   r%   
allow_none  r~  z schema_match.<locals>.allow_noneFT)	argumentsr3  
kwarg_onlyrU   r   rI   r,  )schemar)   r*   r/  r.  argumentr-  remaining_kwargsarg_posr0  r5  r   is_kwdexpected_typer$   r4  r%   schema_match  sT   






r>  c                 C   s"   | D ]}t |||r|  S qd S rP   )r>  )schemasr)   r*   r8  r$   r$   r%   try_find_schema  s
   r@  c                 C   s`   ddl m}m} | tjtjtjfv sJ | tjtjfv r || S tjjj	j
r+|tjS |tjS )Nr   )get_max_simd_tflopsget_max_tensorcore_tflops)r   rA  rB  r.   r  r  r  backendsr/   matmul
allow_tf32)r\  rA  rB  r$   r$   r%   get_device_tflops  s   

rF  c                  C   s   ddl m}  |  S )Nr   get_dram_gbps)r   rH  rG  r$   r$   r%   get_gpu_dram_gbps
  s   rI  c                 C   s
   |  dS )Nwelford)r  reduction_typer$   r$   r%   is_welford_reduction  r|  rM  c                 C   s   t | rdS dS )N   r   )rM  rK  r$   r$   r%   reduction_num_outputs  r~  rO  r  )r$   r   r   r   rP   )r   r   )r   r  r'   r   r   loggingr  r@   rC  r  r  rI  r  r   r  ior   typingr   r   r   r   r   r   r	   r
   r   r   r   r   rK   r.   torch.fx.immutable_collectionsr   r   torch.utils._sympy.functionsr   r   r   r   r   cuda_propertiesr   r   	getLoggerr   r  r   r	  	VarRangesr   r(   rz  r2   r:   rB   rD   rH   rJ   rO   r[   r_   rd   re   rl   rn   rt   r}   r   r   r  r   r   __hash__r   r   r   r   r   r   r   r   r   Noder   r  r
  r  r  r  r!  r$  r:  
namedtupler   r;  r  rS  rZ  r_  r`  re  rj  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r  r\  r  r  r  r  r  r   r  r  r  r   r"  r%  r(  r,  r>  r@  rF  rI  rM  rO  r$   r$   r$   r%   <module>   s    4
"

&
 





"	 
i 
	





<