o
    hW                    @   s&  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZmZmZmZ d dlZd dlZd dlZd dlm  mZ d dlmZmZmZmZmZmZmZmZmZm Z m!Z! d dl"m#Z#m$Z$ d dlm%Z% d dl&m'Z'm(Z(m)Z) d	d
l*m+Z+ ddl,m-Z-m.Z.m/Z/m0Z0 ddl1m2Z2m3Z3 ddl/m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z:m;Z;m<Z<m=Z=m>Z> ddlm?Z?m@Z@mAZAmBZB ddlCmDZDmEZE eFeGZHi ZIi ZJeK ZLejDjMZMejDjNZNejDjOZOeK ZPeK ZQdd ZRdd ZSdd ZTeSeMjUeMjVeMjWeMjXeMjYeMjZeMj[eMj\eMj]eMj^eMj_eMj`g ejaejbejcejdejeejfejgejhejiejjejiejkejldZmdenfddZodd Zpdd Zqdefd d!Zrd"d# Zsd$d% Ztd&d' Zud(d) Zvd*ejwd*fd+d,Zxd-d. Zydd/d0Zz					*dd1d2Z{dd3d4Z|dd5e<dej}fd6d7Z~exeOjdd8d5e<dej}fd9d:Zd*d;d5e<dej}fd<d=ZexeMjj}dd8d5e<dej}fd>d?Zd*d;d5e<d@ejfdAdBZexeOjdd8d5e<d@ejfdCdDZddEejwd*ddd*d*fdFdGZ	*ddHdIZexeMjd*ddJdKdL ZexeMjd*ddJdMdN ZexeMjeMjeMjeMjeOjgdOdP ZeeMdQrexeMje exeMjdd8ddRdSZexeMjdd8ddTdUZexeMjgddVdWZexeMjdXdY ZexeMjdZd[ ZexeMjd\d] ZexeMjd^d_ ZexeMjd`da ZexeMjdbdc ZexeMjdd8ddde ZexeOjdd8dfdg ZexeMjdd8dhdi ZexeMjdjdk ZexeMjdd8exeMjdd8exeMjdd8dldm ZexeMjdd8dndo ZexeMjdd8ddqdrZexeMjdd8e fdsdtZexeMjUdd8ddudvZUexeMjdd8ddwdxZexeMjdd8ddydzZexeMjdd{d|ZexeMjdd8dd}end~endenfddZexeMjdd8dd}end~endenfddZexeMjdd8dd}end~endenfddZexeMjdd8dd ZexeMjdd8dddZexeMjdd8dddZexeMjdd8dddZexeMjdd8dd ZexeMjdd8dd ZexeMjdd8dd ZdddZexeMjdddZdd Ze  dddZe ddd ZdejjfddZdejjfddZddejjfddZdddZdd ZexejDjjdd8dd ZexeMjdd8dd ZexeMjdd8dd ZexeMjjdd8dd ZexeMj˃dd Ze ddd Zdd ZeeMj΃ZeeMjЃZeeMj҃ exeMj΃dd ZexeMjЃdd Zexe.jdd8dd Zexe.jdd8d@ejfddZexe.jdd8dd Zexe.jdd8ddĄ Zexe.jdd8d dŜdeen de<ded}enfddʄZexe.jdd8d dŜdendendeen de<d}enf
dd΄ZexeMjdd8d*d*dϜde<de<dekdekfddՄZddׄ Zddل Zddۄ Zdd݄ ZdhZeeMje eeMjZe eeMje eeMje eeMjed*dߍ eeMjed*dߍ eeMje eeMje eeMj eeMj eeMj eeMj eeMje eeMj eeMj eeMj eeMj eeMj eeMj eeMjj eeMj eeMje eeMj eeMje eeMje eeMj eeMj eeMj eeMj  eeMj eeMj eeMj eeMj eeMjd*dߍ eeMjd*dߍ eeMj eeMj eeMj	 eeMj
 eeMj eeMjd*dߍ eeMjd*dߍ eeMj eeMj eeMj eeMj eeMj eeMj eeMj eeMj eeMj eeMj eeMjd*dߍ eeMjd*dߍ eeMj eeMj eeMj eeMj eeMj eeMj eeMj eeMj eeMj eeMj  eeMj! eeMj" eeMj# eeMj$ eeMj% eeMj& eeMj' eeMj( eeMj) eeMj* eeMj+ eeMj, eeMj- eeMj. eeMj/ eeMj0 eeMj1 eeMj2 eeMj3 eeMj4 eeMj5 eeMj6 eeMj7 eeMj8 eeMj9 eeMj: eeMj; eeMj< eeMj= eeMj> eeMj?d*dߍ eeMj@d*dߍ eeMjAd*dߍ eeMjB eeMjC eeMjD eeMjEd*dߍ eeMjF eeMjG eeMjHd*dߍ eeMjId*dߍ eeMjJd*dߍ eeMjK eeMjL eeMjM eeMjN eeMjO eeMjPd*dߍ eeMjQ eeMjR eeMjSd*dߍ eeMjTd*dߍ eeMjU eeMjV eeMjWd*dߍ eeMjXd*dߍ eeMjY eeMjZ eeMj[ eeMj\ eeMj] eeMj^ eeMj_ eeMj` eeMja eeMjb eeMjc eeMjd eeMje eeMjf eeMjgd*dߍ eeMjhji eeMjjjk eeMjljk eeMjm eeMjn eeMjo eeMjp eeMjqjr eeMjsjk eeMjtjk eeMju eeMjv eeMjw eeMjx eeMjy eeMjzjkd*dߍ eeMj{ eeMj|j} eej~jj eej~jj eeMjjkd*dߍ exeMjdd8dddZexeMjddddZeeMdrexeMje exeOjdd ZexeMjdd8dendenfddZexeMjdd8dddZdd ZexejeMjgdddd*dddZexejdddZexejdd ZexeMjdd Zdd ZexeMjdd8dd Zdd ZexejeMjgdddddddd dZdd Zdd ZexeMjeeZeedZeed Zdd ZexeMjdddddd	d
ZexeMjdddddddZexeMjdddddddZexeOjjkdd ZexejeMjgdd ZexeMjdd8dddZexeMjdd8dddZdd Zdd ZexeMjdd8dd ZexeMjdd8dd ZexeMjdddZexeMjdd d!Zd"d# Zd$d% ZexeMjdd8dd&d'Zd(d) ZexeMjdd8dd*d+ZexeMjdd8denfd,d-ZddEd.dend/ee d0ekfd1d2ZexeMjdd8dd3dend/ee fd4d5ZexeMjdd8denfd6d7ZexeMjdd8denfd8d9ZexeMjdd8denfd:d;ZexeMjdd8dEd<dend0ekfd=d>Z		dd?eee d@f dAenfdBdCZexeMjjkddDee fdEdFZexeMj^jk	ddGee dHee fdIdJZ^exeMjjk			ddKee dGee dHee fdLdMZdNdO ZexeMj_jk		ddPekdGee dHee fdQdRZexeMjdSdT ZexeMjdUdV ZexeOjjkdWdX ZexeMjdd8ddYdZZd[ejdeejenf fd\d]Zd[ejdejfd^d_Zd[ejdejdejfd`daZddcddZdedf ZeeMj[ZexeMj[dd8	*ddgdhZ[eeMj\ZexeMj\dd8didj Z\dkdl ZŐdmdn ZeeMjǃZexeMjǃdodp ZexeMjɐjk	ddqdrZeeMjVZexeMjVdd8	s	 	*	E	ddtduZVeeMjWZexeMjWdd8	ddvdwZWdxdy Z̐dzd{ Z͐dd|efd}d~ZexeMjσdddddZϐdd ZАdd Zѐdd ZexeMjeOjgddd*dddZexeMjՃddd*dddZՐdd Ze{dd ZeeMj؃ZexeMjdEddd Zؐdd ZexeMjۃdd ZexeMjdd8dddZe{dd Ze{dd ZexeMjdEddddZexeMjgdEddd ZexeOjgdEddd ZexeMjeMjߐjgdEejdJeZexeMjeOjgdEddd ZexeMjdd ZexeMjeOjgdddddZexeMjdddddZexeMjdddZexeMjdddZexeMjdddZexeOjeΐd exeMjeΐdZexeMjeΐdZexeMjeΐdejedZexeMjeΐdejedZeeMjdEddZdd Zdd ZeeMjZeeMjZeeMjZeeMj Z eeMjZeeMjZeeMjZeeMjdEdZeeMj eeMj eeMj eeMjZeeMj	Z	eeMj
ddZ
eeMjZeeMjZeeMjZeeMj eeMjZexeMjejd8e eeMj eeMj eeMj eeMjH eeMjddEejkdZeeMjddEejkdZeeMjddEejkdZeeMjddEejkdZeeMjZeeMjZexeMje exeMje eeMjZeeMjZeeMj eeMjddZeeMj eeMj ejkd eeMj!ejkd eeMj"ejkd eeMj#ejkd eeMj$ejkdZ$eeMj%ejkd eeMj&ejkd eeMj' eeMj( eeMj) eeMj* eeMj+ eeMj, eeMj- eeMj. eeMj/ eeMj0 eeMj1 eeMj2 eeMj3 eeMj4 eeMj5 eeMj6jedEd eeMj6j7edEd eeMj8je eeMj8j7e eeMj9je eeMj9j7e eeMj:jke eeMj;j7e؃ eeMj;j<e؃ eeMj=je߃ eeMj=j7e߃ eeMj>e eeMj?je eeMj?j7e eeMj@e eeMjAe eeMjBe dÐdĄ ZCeCeMjDe eCeMjEe eCeMjFe	 eCeMjGe
 eCeMjHe eCeMjIe eCeMjJe eCeMjKe eCeMjLje߃ eCeMjLjMe eCeMjNe eCeMjOe eCeMjPe eCeMjQe eCeMjRe eCeMjSe  eCeMjTe exeMjUe exeMjVe	 exeMjWe exeMjXe exeMjYe eCeMjZeMjU eCeMj[eMjV eCeMj\eMjW eCeMj]eMjX eCeMj^eMjY exeMj_dŐdƄ Z_exeMj`dǐdȄ Z`exeMjadɐdʄ ZaexeMjbdːd̄ Zbe#c D ]\ZdZeexe$edee qexeMj˃d͐d΄ ZfexejDjgjhdϐdЄ Zizod dljZejDjkZkexekjldѐd҄ ZmexekjndӐdԄ ZoexekjpdՐdք Zpexekjqdאd؄ Zqexekjrdِdڄ Zrexekjsdېd܄ Zsexekjtdݐdބ ZtW n euyT   eHvdߡ Y nw ddl,mwZw e+ew ddl,mxZx exy  dS (      N)defaultdict)Iterable)AnyListOptionalTupleUnion)canonicalize_dimcanonicalize_dimscheckdtype_to_typeelementwise_dtypesELEMENTWISE_TYPE_PROMOTION_KINDis_boolean_dtypeis_float_dtypeis_integer_dtypeNumbertype_to_dtype)magic_methodsmethod_to_operator)tree_flatten)CeilDivFloorDivModularIndexing   )import_submodule   )configinductor_primsirtest_operators)decompositionsget_decompositions)
ExpandViewIndexingConstant	is_tritonops_wrapperPermuteView	Pointwise	ReductionSqueezeView	TensorBoxvalidate_irView)ceildivdecode_devicepad_listlikesympy_product)opsVc                 C   s   | s	t d| d S )Nzinductor does not support NotImplementedError)condmsg r8   N/var/www/html/ai/venv/lib/python3.10/site-packages/torch/_inductor/lowering.py
assert_nyi>   s   r:   c                 C   s\   t | tttfrdd | D S t|  t | tjjr*| 	 D ]}tt
| | qd S d S )Nc                 S   s   g | ]}t |qS r8   )add_needs_realized_inputs.0xr8   r8   r9   
<listcomp>E       z-add_needs_realized_inputs.<locals>.<listcomp>)
isinstancelisttuplesetneeds_realized_inputsaddtorch_opsOpOverloadPacket	overloadsgetattr)fnoverloadr8   r8   r9   r;   C   s   
r;   c                 C   s:   t | tjjr|  D ]	}|tt| |< qd S |t| < d S N)rA   rG   rH   rI   rJ   layout_constraintsrK   )rL   
constraintrM   r8   r8   r9   add_layout_constraintL   s
   rQ   )r   r   r                     	   
         dtypec                 C   s2   t | ts| S | tv sJ d|  dt|  } | S )Nzid z missing from DTYPE_ID_LOOKUP)rA   intDTYPE_ID_LOOKUPr\   r8   r8   r9   decode_dtype~   s
   
r`   c                 C   sB   t | trt|  pt|  S t | tjr| jdu S t | tS NT)	rA   r+   r   	get_dtyper   sympySymbol
is_integerr]   r>   r8   r8   r9   is_integer_type   s
   


rg   c                 C   s    t | trt|  S t | tS rN   )rA   r+   r   rb   boolrf   r8   r8   r9   is_boolean_type   s   

ri   type_promotion_kindc                    s0   dd   fdd|D }t |d| i\}}|S )Nc                 S   sF   t | ttjfr
| S t| dsJ t|  }tjdg| | 	 dS )Nrb   r   r_   )
rA   r   rc   rd   hasattrlenget_sizerG   zerosrb   )inpdimr8   r8   r9   construct_input   s
   z+get_promoted_dtype.<locals>.construct_inputc                       g | ]} |qS r8   r8   )r=   argrq   r8   r9   r?      r@   z&get_promoted_dtype.<locals>.<listcomp>rj   )r   )rj   argsinps_r\   r8   rt   r9   get_promoted_dtype   s   	rx   c                 C   sh   t | ttfs| g} nt| } t| D ]}t |tjjr1| D ]}t||}|tvr0| 	| q q| S rN   )
rA   rB   rC   rG   rH   rI   rJ   rK   	loweringsappend)aten_fnrL   rM   other_fnr8   r8   r9   get_overloads   s   

r}   c                    s   dd t  D |s|r4r4|rtjndd  D }t|d|i fddfdd D  |rrrrtt fddD  D ]\}}| |< qFtt D ]}t | t	j
rqt | t d	    |< qU S )
Nc                 S      g | ]\}}t |tr|qS r8   rA   r+   r=   ir>   r8   r8   r9   r?          z"transform_args.<locals>.<listcomp>c                 S   s$   g | ]}t |tst|d r|qS rb   )rA   r   rk   r=   ar8   r8   r9   r?      s    
rj   c                    s@   t | tr
t| S t | tjrt| j d   S | S Nr   )rA   r+   to_dtyper   Constantvalue
get_device)rs   )ru   r\   indicesr8   r9   promote   s
   

ztransform_args.<locals>.promotec                    rr   r8   r8   r   )r   r8   r9   r?      r@   c                       g | ]} | qS r8   r8   r=   r   ru   r8   r9   r?      r@   r   )	enumeraterG   rh   rx   zipbroadcast_tensorsrangerl   rA   r   r   r#   createrB   rm   )ru   	broadcastrj   convert_input_to_boolpromoting_argsr   r>   r8   )ru   r\   r   r   r9   transform_args   s,   $
$r   c                    sD   t   fddt| }t| tfdd|D  S )a  
    Add a foreach lowering to lowerings dict.

    Arguments:
        aten_fn: torch.ops.aten.* fn we are lowering
        decomp_fn: alternate implementation on our IR
        broadcast: True to apply broadcasting to tensor inputs
        type_promotion_kind: kind of type promotion applied to tensor inputs, `None` means no type promotion
        convert_input_to_bool: some logical ops require inputs are converted to bool
    c                     s*   t | dksJ  | i |}t| |S )Nr   )rl   r,   )ru   kwargsout)	decomp_fnr8   r9   wrapped   s   z+_register_foreach_lowering.<locals>.wrappedc                       i | ]}| qS r8   r8   r=   rL   r   r8   r9   
<dictcomp>       z._register_foreach_lowering.<locals>.<dictcomp>)	functoolswrapsr}   foreach_opsupdatery   )r{   r   aten_fnsr8   )r   r   r9   _register_foreach_lowering   s   
r   c                    sB   t  fddt  tfdd D  S )a  
    Add a lowering to lowerings dict

    Arguments:
        aten_fn: torch.ops.aten.* fn we are lowering
        decomp_fn: alternate implementation on our IR
        broadcast: True to apply broadcasting to tensor inputs
        type_promotion_kind: kind of type promotion applied to tensor inputs, `None` means no type promotion
        convert_input_to_bool: some logical ops require inputs are converted to bool
    c                     s   t | } d}t| dkrt| d t tfrd}| d } tdd | D r*J dtdd | D r@td	d  D s@J t| } |rL| g} | i |}t	| |S )
NFr   r   Tc                 s       | ]}|d kV  qdS )r   Nr8   r<   r8   r8   r9   	<genexpr>      
z6_register_lowering.<locals>.wrapped.<locals>.<genexpr>zout= ops aren't yet supportedc                 s   s    | ]}t |tV  qd S rN   r   r<   r8   r8   r9   r         c                 s   s    | ]}|t v V  qd S rN   )	fallbacksr   r8   r8   r9   r     r   )
rB   rl   rA   rC   anykeysvaluesallr   r,   )ru   r   unpackedr   )r{   r   r   r   rj   r8   r9   r      s*   
z#_register_lowering.<locals>.wrappedc                    r   r8   r8   r   r   r8   r9   r     r   z&_register_lowering.<locals>.<dictcomp>)r   r   r}   ry   r   )r{   r   r   rj   r   r8   )r{   r   r   r   rj   r   r9   _register_lowering   s
   r   Fc                 C   s   t jt| |||dS )z+
    Shim to support decorator syntax.
    r   rj   r   )r   partialr   )r{   r   rj   r   r8   r8   r9   register_lowering"  s   	r   c                 C   s   g }t jt| t|tddD ];\} }|dkr||  q| dkr)|| qtjj	| | t
t|jt
t| jk rG|| q||  qtt|S )z
    Broadcasting logic based on symbolic shapes.

    We give the shapes 0 and 1 concrete values, while all other shapes
    are symbolic sympy formulas.
    r   )	fillvalue)	itertoolszip_longestreversedrc   Integerrz   r3   graphsizevarsguard_equalsrl   expandfree_symbolsrC   )r   boutputr8   r8   r9   broadcast_symbolic_shapes4  s    r   c                    s   t dd | D s| S tdd | D r-|pt| dtjifdd  fdd| D S td	d | D }g }| D ]9}t|ttfrZ|	t
t|| | t|  q:t|tjrn|	t|| |  q:|	| q:|S )
Nc                 s   s"    | ]}t |tjttfV  qd S rN   )rA   rc   Exprr]   floatr<   r8   r8   r9   r   M       z$promote_constants.<locals>.<genexpr>c                 s   s"    | ]}t |tttjfV  qd S rN   )rA   r]   r   rc   rd   r<   r8   r8   r9   r   O  r   rj   c                    s0   t | tjrt|  td S t|  td S rN   )rA   rc   rd   r   r$   r/   r   rf   r_   r8   r9   
const_funcT  s   z%promote_constants.<locals>.const_funcc                    rr   r8   r8   r<   )r   r8   r9   r?   Z  r@   z%promote_constants.<locals>.<listcomp>c                 s   s"    | ]}t |ttfr|V  qd S rN   )rA   r+   r#   r<   r8   r8   r9   r   [  r   )r   r   rx   r   DEFAULTnextrA   r]   r   rz   r#   r   r   r   rb   r   rB   rm   rc   r   r$   )inputsoverride_return_dtypeexr   r>   r8   )r   r\   r9   promote_constantsL  s.   r   c                    s*   d ddt t f fdd}|S )Nalphar   c              	      sD  t |	}r| d ur| dkrt|}t|d | |d< n| d u s#J dd |D |d  	p7|d   t|d  jdk|dd  D ]!}t|t	j
sjtt| ksjJ d d d|  qI fd	d
}sd }|D ]}| jdkr| } nq}|s|d  }p|}tj| |dS )Nr   c                 S      g | ]}|  qS r8   make_loaderr<   r8   r8   r9   r?   |  r@   z1make_pointwise.<locals>.inner.<locals>.<listcomp>r   cudazndim mismatch  c                    s   t  t ksJ d  d tjkr&d ur& fddD  S r:r:tjkr: fddD  S  fddD  S )Nzwrong ndim r   c                       g | ]}| qS r8   r8   r=   loadindexr8   r9   r?     r@   zCmake_pointwise.<locals>.inner.<locals>.inner_fn.<locals>.<listcomp>c                    r   r8   r8   r   r   r8   r9   r?     r@   c                    r   r8   r8   r   r   r8   r9   r?     r@   )rl   rG   rh   float64r   )r\   rL   is_cudaloadersoverride_fn_when_cuda_float64override_fn_when_input_boolrangesr   r9   inner_fn  s   $z/make_pointwise.<locals>.inner.<locals>.inner_fndevicer\   r   r   )r   rB   mulrm   rb   r/   r   typerA   r   BaseConstantrl   r(   r   )r   r   otherr   r   r   allow_alpharL   override_devicer   r   r   )r\   r   r   r   r9   innert  sF   
	zmake_pointwise.<locals>.innerr   r+   )rL   r   r   r   r   r   r   r8   r   r9   make_pointwisel  s   &-r   c                    s&   dddt t t  f fdd}|S )Nr   r   r   c                    sl  dd   fdd}d}t jjjD ]}|jD ]}|jdkr"|jtv s$d}qqd }|D ]}t|tt	fr7|} nq*|d us@J dg }|D ]}t|tt	fsX|
|gt|  qD|
| qD|t| }	d gt| }
|	 D ]9\\}}}g }|D ]&\}}r|d	| i}n| }||
|< |jd
kr|r|r|
|  qy|rt j| qotdd |
D sJ |
S )Nc                  W   s   t dd | D S )Nc                 s   s2    | ]}t |totd d |j D V  qdS )c                 s   s    | ]}|j V  qd S rN   )r   r<   r8   r8   r9   r     s    zVmake_foreach_pointwise.<locals>.inner.<locals>.is_dynamic.<locals>.<genexpr>.<genexpr>N)rA   r+   r   datarm   )r=   tr8   r8   r9   r     s    

zLmake_foreach_pointwise.<locals>.inner.<locals>.is_dynamic.<locals>.<genexpr>)r   r   r8   r8   r9   
is_dynamic  s   z9make_foreach_pointwise.<locals>.inner.<locals>.is_dynamicc                    st   t t}t| D ]/\}} |  }d }|D ]}t|tr#|j } nq|d us,J d|||f ||f q|S )Nz.foreach op should have at least one tensor arg)r   rB   r   rA   r+   r   r   rz   )	arg_pairsr   r   ru   use_foreachr   r   r   r8   r9   
group_args  s   



z9make_foreach_pointwise.<locals>.inner.<locals>.group_argsFcall_functionTz1at least one input must be a list to a foreach opr   r   c                 s   s    | ]}|d uV  qd S rN   r8   r<   r8   r8   r9   r         z8make_foreach_pointwise.<locals>.inner.<locals>.<genexpr>)r3   r   current_nodeusersoptargetr   rA   rB   rC   rz   rl   r   itemsr   realizeregister_listr   )r   r   r   realize_outputsnodeusera_list_inputinputbroadcast_inputsgroupsoutputsr   r   groupbuffer_list
output_indru   r   r   pw_fnr   r9   r     sV   	

z%make_foreach_pointwise.<locals>.innerr   )r	  r   r   r8   r  r9   make_foreach_pointwise  s   "Ir
  r>   c                    s8   |    kr|rt| S | S  fdd}t| d| S )Nc                       t |  S rN   )r2   r   rf   r_   r8   r9   	_to_dtype     zto_dtype.<locals>._to_dtyper   )rb   cloner   )r>   r\   copyr  r8   r_   r9   r     s   r   rj   c                 C      t | |ddS NTr  r   r>   r\   r8   r8   r9   _convert_element_type     r  r  c                   sv   |    kr|rt| S | S dd }||   }| }||kr-td|    d  d fdd}t| d| S )	Nc                 S   s   | j r	t| jS t| jS rN   )is_floating_pointrG   finfobitsiinfor_   r8   r8   r9   _get_primitive_bitwidth  s   z1to_dtype_bitcast.<locals>._get_primitive_bitwidthzbitcast z to different bitwidth type z is not supported yet.c                    r  rN   )r2   to_dtype_bitcastrf   r_   r8   r9   _to_dtype_bitcast  r  z+to_dtype_bitcast.<locals>._to_dtype_bitcastr  )rb   r  r5   r   )r>   r\   r  r  src_bitsdst_bitsr  r8   r_   r9   r     s   r  c                 C   r  r  )r  r  r8   r8   r9   _view_dtype  r  r"  r   c                C   s8   t |}|  |kr|rt| S | S ttj| |S rN   )r/   r   r  r+   r   r   
DeviceCopy)r>   r   r  r8   r8   r9   	to_device  s   r$  c                 C   r  r  )r$  )r>   r   r8   r8   r9   _device_put#  r  r%  Tc	                 C   s   |p| j }t|}	|rtd| }
|durt|}t|	|||r!|
nd|d}	t| |||d|	}	tt|rAttt|d|d|	 |	S )z3A pointwise function that maps ops.{name} to inputs
libdevice_N)r   r   r   r   r   )rj   r   )__name__r&   r   r   rk   primsrK   )r{   namer   rj   r   r   r   r   use_libdevice_for_f64rL   fn_libdevicer8   r8   r9   register_pointwise(  s<   


r,  c                 C   s   t ||d}t| |}|S )Nr   )r
  r   )r{   pointwise_lowering_fnr   rL   r8   r8   r9   register_foreach_pointwiseR  s   
r/  )r   rj   c                    s  dd }t |ttfrt||}t |ttfrt||}| ||g t d  d tjd}dd t D }t|t	 fdd|D  D ]\}}| |< qFt
t D ]}t  | tjrqt | t |d	    |< qUt||d
 d	 t d |t d |S )Nc                  W   
   t j|  S rN   )r2   wherer   r8   r8   r9   rL   ^     
zwhere.<locals>.fnr   r   r  c                 S   r~   r8   r   r   r8   r8   r9   r?   j  r   zwhere.<locals>.<listcomp>c                    r   r8   r8   r   r   r8   r9   r?   k  r@   r   r  )rA   r   r]   constant_likerx   r   r   r   r   r   r   rl   r   r   r#   r   rB   rm   r   r   )r6   r   r   rL   r\   r   r   r>   r8   r   r9   r1  \  s&   
$
$
r1  c                  G   s   t | dkrt| d ttfrt| d  S ttdd | D g }g }| D ]$}| }t |t |ks?t	dd t
||D rDt||}|| q%|S )Nr   r   c                 S   r   r8   rm   r<   r8   r8   r9   r?   z  r@   z%broadcast_tensors.<locals>.<listcomp>c                 s   s4    | ]\}}|d kr|d kp|d ko|d kV  qdS r   Nr8   r=   r   r   r8   r8   r9   r     s    $
z$broadcast_tensors.<locals>.<genexpr>)rl   rA   rB   rC   r   r   reducer   rm   r   r   r   rz   )r   r   r  r>   sizesr8   r8   r9   r   u  s   
r   c                 C   s   | S rN   r8   rf   r8   r8   r9   nop  s   r9  
lift_freshc                 C   s   t | tsJ |d u rtt| jS tt|  |}tt |t	s%|fn|}g }t
|  D ]\}}||v rCtjjt|dsH|| q0||  krTt| |S | S Nr   )rA   r+   r*   r   r   r
   rl   rm   rD   rC   r   r3   r   r   evaluate_exprrc   Eqrz   view)r>   rp   dims	new_shapedsr8   r8   r9   squeeze  s   
rC  c                 C   s   t t| |S rN   )r  rC  )r>   rp   r8   r8   r9   squeeze_copy  r  rD  c                 C   2   t | |}t| tsJ t|tsJ |j| _| S rN   )rC  rA   r+   r   r>   rp   valr8   r8   r9   squeeze_  
   
rH  c                 C   2   t | rt| dtjdS td}t|tjd| S )NFr_   isinfr  rg   	full_likerG   rh   r&   r   r>   rL   r8   r8   r9   rK       rK  c                 C   rJ  )NFr_   isnanr  rL  rN  r8   r8   r9   rP    rO  rP  c                 C   $   t | rt| S td}t|| S )Nceilrg   r  r&   r   rN  r8   r8   r9   rR       rR  c                 C   rQ  )NfloorrS  rN  r8   r8   r9   rU    rT  rU  c                 C   rQ  )NroundrS  rN  r8   r8   r9   rV    rT  rV  c                 C   rQ  )NtruncrS  rN  r8   r8   r9   rW    rT  rW  c                 C   s   t | g\} t| tjrt| t|S t| tsJ t|ttfs$J t| 	 t|kr0| S t
jjt| 	 }|dkrM| t
jjt||  tt| jt|S r   )r   rA   r   r   r#   r   rC   r+   rB   rm   r3   r   r   	size_hintr1   
mark_reuser   )r>   r8  x_size_productr8   r8   r9   r     s   r   c                 C   sL   t |}|D ]}d||< q| }t|D ]\}}|dkr t||}qt||S )Nr   )rB   r   	unsqueezer   )r   shapebroadcast_dimensionsrB  broadcast_dimensionvidxr>   r8   r8   r9   broadcast_in_dim  s   


ra  c                 C   s   t | | S rN   )r   rm   r>   yr8   r8   r9   	expand_as  r  rd  c                    sP  t |   tt kr%tdgtt      t| t  } tt|  ks1J t |  }d}ttD ]}| dkrId}|| |  ||< q?|rat||  | 	 dS t
dd t D rrt| |S  fdd	}tjjt }|dkr| tjjt||  |  tj| 	 |  |t |d
S )Nr   Fr   Tr\   r   c                 s   s$    | ]\}}|d kp|d kV  qdS r5  r8   r6  r8   r8   r9   r        " zrepeat.<locals>.<genexpr>c                    sv   t | t ks
J t| } tt D ]"}| dkr6 | dkr*td| |< qt| | d | | |< q| S Nr   r   )rl   rB   r   rc   r   r   )r   r   old_sizerepeatsx_loaderr8   r9   r     s   zrepeat.<locals>.inner_fnr   )rB   rm   rl   rc   r   r>  r   emptyrb   r   r   r   r   r3   r   r   rX  r1   rY  r   r(   r   )r>   rj  new_sizezero_tensorr   r   old_size_productr8   rh  r9   repeat  s8    
rp  c                 C   s2   t | tsJ t |ttfsJ tt| j|S rN   )rA   r+   rB   rC   r-   r   r   )r>   r8  r8   r8   r9   r>  4  s   r>  c                 C   s6   t | tsJ t |ttfsJ tt| jt|S rN   )rA   r+   rB   rC   r'   r   r   )r>   r?  r8   r8   r9   permute=  s   rq              c                 C   s|   t | tsJ t| |d}|  | }tjjt	|| dr"d}tjjt	|| dr1d}tt
j| j||||S r   )rA   r+   _validate_dimrm   r3   r   r   r<  rc   Ltr   	SliceViewr   r   )r>   rp   startendstepdim_sizer8   r8   r9   slice_D  s   rz  c                    st  t |ts|f}t |ts|f} fdd|D }t  dkr%t S t|}t|}|dks5|dkr|dkr=td|dkr^|dkr^t t  g}t||d}t|t	  S ||krltd| d| |dd }|dd }t |d |d }	t|	||S |\t
jj   |d     fd	d
}
tj    |
  dS )z
    This is based on torch._refs.roll(), but uses ModularIndexing().

    We can't use the ref here because it is based on multiple calls to
    torch.cat() that this will result in terrible code.
    c                    s   g | ]}t  |qS r8   )rs  r=   rA  r   r8   r9   r?   ]      zroll.<locals>.<listcomp>r   r   z`shifts` requiredz*shifts and dimensions must align. shifts: z, dims: Nc                    s4   t | } t|   tdt| <  | S r;  )rB   r   rc   r   r   r   )a_loaderrp   sizerv  r8   r9   rL   |  s
   zroll.<locals>.fnr   )rA   r   r1   rm   r  rl   RuntimeErrorr>  rollrB   r3   r   r   evaluate_static_shaper   r(   r   r   rb   )r   shiftsr?  
len_shiftslen_dimsflatrolledtail_shifts	tail_dimsfirst_dim_rolledrL   r8   )r   r~  rp   r  rv  r9   r  P  sF   
	
r  c              	   C   s   t | trt | jtjr| j } |   t| s"td|  dt	| \}}t
|j|jdd |D dd |D t|p@d}tt||S )Nzunrealized as_strided(z, ...)c                 S      g | ]}t |qS r8   rc   r   r=   rB  r8   r8   r9   r?     r}  zas_strided.<locals>.<listcomp>c                 S   r  r8   r  r  r8   r8   r9   r?     r}  r   )rA   r+   r   r   BaseViewunwrap_viewr   is_storage_and_layoutr5   as_storage_and_layoutFixedLayoutr   r\   rc   r   ReinterpretView)r>   r  stridestorage_offsetstorage
old_layout
new_layoutr8   r8   r9   
as_strided  s   

r  c                 C   s$   t | tsJ t| |||j| _| S rN   )rA   r+   r  r   )r>   r  r  r  r8   r8   r9   as_strided_  s   r  c                 C   s   t | |||}t|S rN   )r  r  )r>   r  r  r  resultr8   r8   r9   as_strided_copy  s   r  c                    s   t dd | D r.| D ]}|  qt dd | D r&ttjg| R  \} }ttj| |S t| dkr:t| d S t| d |d}t	| dt
ji  fdd| D } ttj| |S )	Nc                 s   s    | ]
}|  tju V  qd S rN   )rb   rG   uint8r=   r  r8   r8   r9   r     s    zcat.<locals>.<genexpr>c                 s   s     | ]}t |jjd kV  qdS )rS   N)rl   layoutr  r  r8   r8   r9   r     s    r   r   rj   c                    s   g | ]}t | qS r8   r  )r=   ro   r_   r8   r9   r?     r}  zcat.<locals>.<listcomp>)r   r   require_channels_lastatencatfallback_handlerrl   r  rs  rx   r   r   r+   r   ConcatKernelr   )r   rp   r  rw   r8   r_   r9   r    s    
r  offsetdim1dim2c                    s   |   ttdtdtkfdd tjjt	|d}|r<t
t |  d}nt
t  | d}d |rT| df nd|f fddtD }||  fdd	}ttj| ||S )
N)r`  rankc                      s   d  d S )Nz(diagonal dimensions cannot be identical z, r8   r8   r  r  r8   r9   <lambda>      zdiagonal.<locals>.<lambda>r   )r   r   c                    s    g | ]\}}| fvr|qS r8   r8   )r=   r   rB  r  r8   r9   r?          zdiagonal.<locals>.<listcomp>c                    s   | d }dgt  }d}tD ]&}|kr | d  ||< q|kr-| d  ||< q| | ||< |d7 }q|t d ksBJ |S )Nr   r   r   r   )rl   r   )r`  diag_idxoriginal_idxcur_dimrA  base_idxr  r  num_dimsoriginal_shaper8   r9   	reindexer  s   
zdiagonal.<locals>.reindexer)rm   rl   r	   r   r3   r   r   r<  rc   rt  maxminr   rz   r+   r   GenericViewr   )r  r  r  r  offset_negative	diag_sizer8  r  r8   r  r9   diagonal  s&   
r  c                 C   s   t t| |||S rN   )r  r  )r  r  r  r  r8   r8   r9   diagonal_copy     r  c                 C   $   t | }t||||}t|| |S rN   )r  r  	mutate_to)r  srcr  r  r  r   r   r8   r8   r9   diagonal_scatter     
r  c                 C   s,   t ||  | }tt| |||d |S r;  )r-   handle_negative_indexrm   rC  rz  )r>   rp   r`  r8   r8   r9   select  s   r  c                 C   s   t | |d}tjj|  | }t|tjrtjj|}t|t	tj
fr1|g|| d |  }g }d}|D ]}|| }|t| ||| |}q7|S Nr   r   )rs  r3   r   r   r  rm   rA   rc   r   r]   r   rz   rz  )r>   r8  rp   x_sizer  rv  r  rw  r8   r8   r9   split  s   r  c                 C   s   t | ||S rN   )r  )r>   r8  rp   r8   r8   r9   split_with_sizes     r  c                 C   sJ   t | |d}tjj|  | }g }t|D ]}|t| || q|S r   )	rs  r3   r   r   r  rm   r   rz   r  )r>   rp   r  r  r   r8   r8   r9   unbind  s   r  c           
         s   |   }t|}t|| |dkrtt| d|dS tjj}|||   |	d t
|  | d }| |t|| |   g |d   || d d  |} fdd}	ttj| ||	S )Nr   )rw  r   c                    s:   | d |     }g | d   ||  d d R S )Nr   r   r8   )r`  dim_idxrp   rx  r8   r9   r  5  s   &zunfold.<locals>.reindexer)rm   rl   r	   rz  r[  r3   r   r   	guard_leqguard_ltr   rY  rX  r   r+   r   r  r   )
r>   	dimensionr  rx  r8  ndimr   new_dim_sizeout_sizer  r8   r  r9   unfold#  s   
(r  c                 C   s4   t | |d}t|  }||td t| |S r;  )rs  rB   rm   insertrc   r   r>  )r>   rp   r@  r8   r8   r9   r[  <  s   
r[  c                 C   rE  rN   )r[  rA   r+   r   rF  r8   r8   r9   
unsqueeze_D  rI  r  c                 C   sR   t |tsJ t|  }|dk r||| 7 }d|  kr$|| k s'J  J |S r   )rA   r]   rl   rm   )r>   rp   r  r  r8   r8   r9   rs  M  s    rs  r   c                 C   sT   t | |d}tjj|  | d }t| |d|}t| |||d }t|t|S )Nr   r   )	rs  r3   r   r   r  rm   rz  r   sigmoid)r>   rp   new_lenr   r   r8   r8   r9   gluV  s
   r  c               !   C   sj  t jjr2t jjjt jjjt jjjt jjjt	j
jt jjjg} tt jjjdtdtdtfdd}tt jjjjdtdtdtdtfdd}tt jjjjdtdtdtdtfd	d
}tt jjjdtdtdtfdd}tt jjjjdtdtdtdtfdd}tt jjjdtdtdtfdd}tt	j
jdtdtdtdtdtdtdtdtdtt dtdtdtdtd td!td"tf d#d$}tt jjjd d%dtd&td'td(tdtf
d)d*}tt jjjjd d%dtd+td&td'td(tdtfd,d-}	tt jjjd d%dtd&td'td(tdtf
d.d/}
t jjr,| t jjj tt jjjdtd0td1tdtfd2d3}t|  d S 	 d S )4Nr>   weightbiasc
           
      S   s$   t tj| |||||||||	
S rN   )r+   r   r   ConvolutionUnary)
r>   r  r  paddingr  dilationr  attrscalars	algorithmr8   r8   r9   convolution_unaryk  s   z5register_onednn_fusion_ops.<locals>.convolution_unaryr   c                 S   *   t tj| |||||||||	|
||S rN   )r+   r   r   ConvolutionBinaryr>   r   r  r  r  r  r  r  binary_attrbinary_alpha
unary_attrunary_scalarsunary_algorithmr8   r8   r9   convolution_binary  "   z6register_onednn_fusion_ops.<locals>.convolution_binaryc                 S   r  rN   )r+   r   r   ConvolutionBinaryInplacer  r8   r8   r9   convolution_binary_inplace  r  z>register_onednn_fusion_ops.<locals>.convolution_binary_inplacewr   c              
   S   s   t tj| |||||S rN   )r+   r   r   LinearUnary)r>   r  r   r  r  r  r8   r8   r9   linear_unary  s   z0register_onednn_fusion_ops.<locals>.linear_unaryrc  c              	   S      t tj| ||||S rN   )r+   r   r   LinearBinary)r>   rc  r  r   r  r8   r8   r9   linear_binary     z1register_onednn_fusion_ops.<locals>.linear_binaryc                 S   s&   t tj| |||||||||	|
S rN   )r+   r   r   ConvolutionTransposeUnary)r>   r  r  r  output_paddingr  r  r  r  r  r  r8   r8   r9   convolution_transpose_unary  s   z?register_onednn_fusion_ops.<locals>.convolution_transpose_unaryw0w1w2w3hxcxreversebatch_sizesmodehidden_size
num_layers
has_biasesbidirectionalbatch_firsttrainc                 S   s4   t tjtj| |||||||||	|
|||||S rN   )pytreetree_mapr+   r   r   MkldnnRnnLayer)r>   r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r8   r8   r9   mkldnn_rnn_layer  s*   z4register_onednn_fusion_ops.<locals>.mkldnn_rnn_layerr  packed_weightw_scalew_zpc                 S   s2   t tj| |||||||||	|
||||||S rN   )r+   r   r   QConvPointWisePT2E)r>   x_scalex_zpr
  r  r  r  r  r  r  r  o_inv_scaleo_zero_pointfp32_outputr  r  r  r8   r8   r9   qconvolution_unary  s*   z6register_onednn_fusion_ops.<locals>.qconvolution_unaryaccumc                 S   s<   t tj| |||||||||	|
|||||||||||S rN   )r+   r   r   QConvPointWiseBinaryPT2E)r>   r  r  r  accum_scaleaccum_zpr
  r  r  r  r  r  r  r  r  r  r  r  r   r  r  unary_algorithmmr8   r8   r9   qconvolution_binaryH  s4   z7register_onednn_fusion_ops.<locals>.qconvolution_binaryc                 S   r  rN   )r+   r   r   QLinearPointwisePT2E)r>   r  r  r
  r  r  r  r  r  r  r  r  r  r8   r8   r9   qlinear_unary~  r  z1register_onednn_fusion_ops.<locals>.qlinear_unarypacked_worig_wc                 S   s.   t tj| |||}|d urt||}|S rN   )r+   r   r   MKLPackedLinearrF   )r>   r  r  r   
batch_sizer  r8   r8   r9   mkl_packed_linear  s   
z5register_onednn_fusion_ops.<locals>.mkl_packed_linear)rG   _C_has_mkldnnr2   mkldnn_convolution_pointwise_convolution_pointwise_ _convolution_transpose_pointwise_linear_pointwiser  r	  defaultonednnqconv2d_pointwiser   r+   binaryrh   r   r]   qlinear_pointwisehas_mklrz   mkl_mkl_linearr;   )cpu_needs_realized_inputsr  r  r  r  r  r  r	  r  r  r  r   r8   r8   r9   register_onednn_fusion_ops`  s  
	!!
	
()	
3
!r1  c                    s   |rt    fdd}|S )Nc                     s$   t tjtjj g| R i |S rN   )r  r  r+   r   r   FallbackKernelru   r   kernelr8   r9   handler  s   z!fallback_handler.<locals>.handler)r   rF   )r5  add_to_fallback_setr6  r8   r4  r9   r    s   
r  c                   C      t d d S )NzjTorchinductor does not support code generation for complex operators. Performance may be worse than eager.)warningswarnr8   r8   r8   r9   _warn_complex_not_supported  s   r;  r   c                 C   s   |   r	t  dS dS )z0Do not support reading or writing to this tensorTF)
is_complexr;  r   r8   r8   r9   unsupported_input_tensor  s   r>  c                 C   s   t | rdS | jotjS )z2Do not support writing tensor but can read from itT)r>  is_cpur   disable_cpp_codegenr=  r8   r8   r9   unsupported_output_tensor  s   rA  r   c                 C   sd   | j tjju r	dS | j tjju rdS dd }t| j| jfd D ]}||ddr+ dS q || ddS )NFc                 S   sn   t | tjjs	dS d| jvrdS t| jd d D ]}t |tjjs#q|r-t|r, dS qt	|r4 dS qdS )NFrG  r   T)
rA   rG   fxNodemetar   _subclasses
FakeTensorrA  r>  )r   	is_outputrD  r8   r8   r9   check_skip_condition  s   
zCfallback_node_due_to_unsupported_type.<locals>.check_skip_conditionr   )rG  T)r   r  view_as_complexr(  lift_fresh_copyr   ru   r   )r   allow_cpu_inputsrH  rs   r8   r8   r9   %fallback_node_due_to_unsupported_type  s   rL  c                 C   s   | t vsJ d|  t| gr0|r0ttdr0tjjjr(dtjj_t	
d td|  dt|  |d ur=t| | t| d dt| S )Nz.both a fallback and a decomp for same kernel: CIFzmA make_fallback error occured in suppress_errors config, and suppress_errors is being disabled to surface it.zmake_fallback(a.  ): a decomposition exists, we should switch to it. To fix this error, either add a decomposition to core_aten_decompositions (preferred) or inductor_decompositions, and delete the corresponding `make_fallback` line. Get help from the inductor team if unsure, don't pick arbitrarily to unblock yourself.r  )r!   r"   rh   osgetenvrG   _dynamor   suppress_errorslogwarningAssertionErrorr;   rQ   r   r  )r5  layout_constraintr:  r8   r8   r9   make_fallback  s    




rV  c                 C   s$   d}| D ]}|| }qt |tjdS )z
    TorchInductor offset calculation differs from PyTorch eager offset
    calculation for random ops (tl.rand vs torch.rand). In future, we should
    strive for same impl for tl.rand and torch.rand.
    r   r_   tensorrG   int64)r\  numelrB  r8   r8   r9   philox_rand_offset  s   
r[  c           	         sd   t | | t j|  | |  fdd}tj| |t| d}t	| }||fS )Nc                    sV   t g tj}t g tj}t t | tj|}t ||}t | S rN   )r2   r   rG   int32rF   
index_exprrand)r   seed_index_exproffset_index_exprrand_index_exprr  r\   offset_loader
random_posseed_loaderr8   r9   r   8  s   zphilox_rand.<locals>.inner_fnr   )
r   r  FlexibleLayoutcontiguous_stridesmake_indexerr   r(   r   rB   r[  )	r  seedr  r  r   r\   r   random_values_nodeoffset_noder8   rb  r9   philox_rand+  s&   
rl  c              	   C   s,   t jrttjtjtj	| ||S t
d)Nz&should be handled in replace_random.py)r   fallback_randomr  r  r+   r   r   r2  r  native_dropoutrT  )r>   pr  r8   r8   r9   rn  R  s
   rn  c                 G   s>   t js|  tdksJ d|   tj| g|R   | S NcpuzTthis should be handled in decomps unless config.fallback_random or the device is CPU)r   rm  r   rG   r   r   r   InplaceBernoulliFallbackr>   ru   r8   r8   r9   
bernoulli_\  s   rt  c                 G   s4   t js|  tdksJ dtt| g|R  S rp  )r   rm  r   rG   r   rt  r  rs  r8   r8   r9   bernoulli_pf  s   ru  c                 C   s   t  rN   rT  rw   r8   r8   r9   _foobaro  s   rx  c                 C   r8  )Nz1using triton random, expect difference from eager)rR  info)saltr8   r8   r9   _warn_triton_randomt  r  r{  c                   C   s   t tjj d S rN   )r{  r3   r   creation_timer8   r8   r8   r9   warn_triton_randomy     r}  c                  O   ,   t js|dd d urt| i |S tdN	generatorz-should have been handled in replace_random.py)r   rm  getfallback_randrT  r3  r8   r8   r9   r^       r^  c                  O   r  r  )r   rm  r  fallback_randnrT  r3  r8   r8   r9   randn  r  r  c                 C   s   t |}t j| |S rN   )r   get_stride_orderExternKernelrequire_stride_order)input_tensorr  stride_orderr8   r8   r9   inductor_force_stride_order  s   
r  c                 C      t d)Nz.should be handled in fuse_seed_creation_pass()rv  )r   r8   r8   r9   inductor_seed     r  c                 C   s   t   tt| t|S rN   )r}  r+   r   r   RandomSeedsr/   )countr   r8   r8   r9   inductor_seeds  s   r  c                    s(    fdd}t j  |g dS )Nc                    s   t   S rN   )r2   	load_seedget_namerw  r   seedsr8   r9   r        z&inductor_lookup_seed.<locals>.inner_fnr   )r(   r   r   rb   )r  r   r   r8   r  r9   inductor_lookup_seed  s   r  r  r  ri  r  c                   s   t jrJ  dv sJ g | } tj}| }tj||| tj| |d	 |
  fdd}tj|||g | d}|  |S )N)r^  r  r  c                    s"   t t g t| tjS rN   )rK   r2   r]  rG   r\  r   r  rd  re  r8   r9   r     s   z!inductor_random.<locals>.inner_fnr   )r   rm  rG   float32r   r   r  rf  rg  rh  r   r(   r   r   )r  ri  r  r  r\   r   r   r  r8   r  r9   inductor_random  s(   
r  lowhighc                   sp   t jrJ g |}tj}| }tj|||tj||d	 |
  fdd}tj|||g |dS )Nr  c                    s"   t g t | tj S rN   )r2   	randint64r]  rG   r\  r   r  r  rd  re  r8   r9   r     s   z"inductor_randint.<locals>.inner_fnr   )r   rm  rG   rY  r   r   r  rf  rg  rh  r   r(   r   )r  r  r  ri  r  r\   r   r   r8   r  r9   inductor_randint  s"   
r  	out_int32rightr  
boundariesr  r  c                   s   t   dks
J t| rt sttjdd|  |dS      d   }|  }|  |r:t	j
nt	j fdd}tj|||  dS )	Nr   F)r7  r  r   c                    s"   | }t |  }|S rN   )r2   	bucketizer  )r   rG  r   r  boundaries_sizeindex_dtypeinput_loaderr  r8   r9   r     s   zbucketize.<locals>.inner_fnr   )rl   rm   r%   r  r  r  r   r   r   rG   r\  rY  r(   r   )r  r  r  r  boundaries_loaderr   r   r8   r  r9   r    s$   r  c                 O   $   t tjdd ||f\}}||fS )Nc                 S      t j| S rN   )r   r  require_stride1r=  r8   r8   r9   r        zrequire_dense.<locals>.<lambda>r  tree_map_onlyr   IRNoderw   ru   r   r8   r8   r9   require_dense     r  c                 O   r  )Nc                 S   r  rN   )r   r  require_contiguousr=  r8   r8   r9   r     r  z$require_contiguous.<locals>.<lambda>r  r  r8   r8   r9   r    r  r  c                 O   r  )Nc                 S   r  rN   )r   r  r  r=  r8   r8   r9   r  '  r  z'require_channels_last.<locals>.<lambda>r  r  r8   r8   r9   r  %  r  r  c                    sF   dd  t  fddt|jD } fdd| D }||fS )Nc                 S   s2   t | tjrt|jd  }tj| |S | S )NrG  )rA   r   r  r  rD  r  r  r  )rs   fx_argr  r8   r8   r9   apply_constraint-  s   z1constrain_to_fx_strides.<locals>.apply_constraintc                 3   s    | ]
\}} ||V  qd S rN   r8   )r=   rs   r  )r  r8   r9   r   3  s    
z*constrain_to_fx_strides.<locals>.<genexpr>c                    s"   i | ]\}}| |j | qS r8   )r   )r=   kr_  r  fx_noder8   r9   r   6  s   " z+constrain_to_fx_strides.<locals>.<dictcomp>)rC   r   ru   r   )r  ru   r   r8   r  r9   constrain_to_fx_strides,  s   
r  ztorchvision::roi_align)r:  c                 C   sn   |}|   |  krt||   }|  | kr t||  }|  | kr3t||  }t|S t|S rN   )r   r$  rb   r   rm   r   r  )selfr  non_blockingr>   r   r8   r8   r9   r    s   r  )memory_formatc                C   s&   t j|  |  |  t|  dS Nr   )r(   r   r   rb   r   rB   rm   )r>   r  r8   r8   r9   r    s   
r  rJ  c                   s(    fdd}t jt| || gdS )Nc                    s   t j| d    dS )Nr   r_   r2   r]  r   r\   rv  rx  r8   r9   rL        ziota.<locals>.fnr   )r(   r   r/   )lengthrv  rx  r\   r   requires_gradrL   r8   r  r9   iota  s   
r  rp   r   c                    s   |   |  ks
J |  t|  d tjjtdr'| 	    tjj
d tjj| 	    tt| | 	 }|  fdd}tj|  |   |t| 	 dS )Nr   c              	      s6   t t t |   tjt tj| | S rN   )r2   r1  eqr]  rG   r\  r`  rp   r   
src_loaderrk  r8   r9   r   %  s   z select_scatter.<locals>.inner_fnr   )rb   r   rs  r3   r   r   r<  rc   rt  rm   r  r  r   r[  r(   r   r   rB   )r>   r  rp   r   r   r8   r  r9   select_scatter  s    

r  c                    s(    |  ks
J  t d    d ur-tjjt	dr- d ur@tjjt	dr@ d u rFdd u sVtjj
   rXt }tt t| < t||}|  fdd}tj   |t dS )Nr   c              
      s4  dkrkrdkr| S t |  tj}t|  t|    < g }dkr?|t |t t	tj krT|t 
|t t	tj dkrt|t t t|   dtjt dtjj |sxJ tt j|}t | fddtrdnd}t ||| S )Nr   r   c                          S rN   r8   r8   )src_idxr  r8   r9   r  p      z1slice_scatter.<locals>.inner_fn.<locals>.<lambda>        )r2   r]  rG   rY  rB   r   rz   gerc   r   ltr  r   constantr   r7  and_maskedrg   r1  )r`  idx_dimmasksrc_valrp   ry  rw  r  rv  rx  r>   rk  )r  r9   r   K  sR   zslice_scatter.<locals>.inner_fnr   )rb   r   rs  rm   r3   r   r   r<  rc   rt  statically_known_leqrB   r   r   r(   r   r   )r>   r  rp   rv  rw  rx  src_sizer   r8   r  r9   slice_scatter7  s.    
.
r  c                 C   s*   t | ttfrt| dkrt| d S | S r   )rA   rB   rC   rl   _unwraprf   r8   r8   r9   r    s   r  r\   r   r  
pin_memoryc                   s  t |d tjfv d|  t | d tt tr ptjnp%t g }t tj	r6 fdd}nBt t
tfrE fdd}n3t dksZt d t
tfrlt dkrl|tt   fdd}ntjtj |d	S tjt|||d
S )Nlayout=r  c                       t  S rN   r  r   r   r\   r8   r9   r     r  ztensor.<locals>.inner_fnc                    r  rN   r2   r  r   r  r8   r9   r     r  r   rW   c                    s8    fdd t dkrtdS  dt S )Nc              	      sr   | |k sJ ||  dkrt |  S ||  d |  }t t t d tjt |tj | | ||S )Nr   r   r   )r2   r  r1  r  r]  rG   rY  )rv  rw  mid)binary_searchr   r\   r   r8   r9   r    s   z/tensor.<locals>.inner_fn.<locals>.binary_searchr   )rl   r2   r  r   r  )r  r   r9   r     s   re  r   )r:   rG   stridedrA   r  r]   rY  get_default_dtyperc   r   r   rl   rz   r   r3   r   add_tensor_constantrX  r(   r   r/   )r   r\   r   r  r  r   r   r8   r  r9   rX    s,   *rX  c                 C   s@   t | tr|d urt| |} |d urt| |} | S t| ||dS )Nre  )rA   r+   r   r$  rX  )r   r\   r   r8   r8   r9   	as_tensor  s   


r  c                 C      t | tjdS )Nr_   rW  r   r8   r8   r9   long_tensor  r  r  c                 C   s   t  S rN   )r   DynamicScalarr  r8   r8   r9   _local_scalar_dense  r  r  c                    s   | t | ttfstdrjt ttfr  fdd}n"t tjr. fdd}nt dks8J 	 fdd}t
j| |t|dS )Nr   c                       t  S rN   r  r   r\   r   r8   r9   r     r  z_full.<locals>.inner_fnc                    r  rN   r  r   r  r8   r9   r     r  r   c                    s    g S rN   r8   r   )value_loaderr8   r9   r     s   r   )rA   r]   r   rk   r   rc   r   rl   rm   r   r(   r   rB   )
fill_valuer   r\   r  r   r8   )r\   r   r  r9   _full  s    r  c                 K   s   t t|| fi |S rN   create_tensor_liketensor_constructor)r>   r  r   r8   r8   r9   rM       rM  c                    s    d d d d dd d fdd
}|S )NF)namesr\   r   r  r  r  c                    s   t | d u d t |d tjfv d|  t | d t|}|p#t }t|dkr;t|d tttj	fr;t|d }dd |D }t
 |||S )Nnamed tensorsr  r  r   r   c                 S   r  r8   r  r  r8   r8   r9   r?   
	  r}  z5tensor_constructor.<locals>.inner.<locals>.<listcomp>)r:   rG   r  r/   r  rl   rA   rB   rC   Sizer  )r  r\   r   r  r  r  r  r  r8   r9   r     s   	"z!tensor_constructor.<locals>.innerr8   )r  r   r8   r  r9   r    s   r  )r  r\   r  r   r  r  c                 G   sX   t | d u d t|}t|dkr"t|d tttjfr"t|d }t|d ||||dS )Nr  r   r   r\   r  r   r  )	r:   r/   rl   rA   rB   rC   rG   r  empty_strided)r  r\   r  r   r  r  r  r8   r8   r9   rl  	  s   
"rl  c                    s   dddddd fdd
}|S )zZ
    Shim to convert X_like(...) into X(...).  For example zeros_like() into zeros().
    NF)r\   r   r  r  r  c                   sj   t | d t |d tjfv d|  |d u r|  }nt|}|p%|  }t|  } |||||dS )Nr  r  r  )r:   rG   r  rb   r`   r   rB   rm   )r>   r\   r   r  r  r  r  creation_fnr8   r9   _constant_like(	  s   

z*create_tensor_like.<locals>._constant_liker8   )r  r  r8   r  r9   r  #	  s   
r  c                 C   s   t t| S rN   r  r  r8   r8   r9   r3  :	  r  r3  c                    s   d d d d d fdd
}|S )Nr  c                   sp   t |ttfs	J t| d t|d tjfv d|  t|p#|  }|p)|  }dd |D }t	 |||S )Nr  r  c                 S   r  r8   )rc   r   r  r8   r8   r9   r?   L	  r}  z7new_constant.<locals>._new_constant.<locals>.<listcomp>)
rA   rB   rC   r:   rG   r  r`   rb   r   r  r>   r  r\   r  r   r  r  r8   r9   _new_constantD	  s   z#new_constant.<locals>._new_constantr8   )r  r  r8   r  r9   new_constantC	  s   r  r  c                C   s4   |d u r|   }|d u r|  }t|d ||||dS Nr  rb   r   r  r   r8   r8   r9   	new_emptyR	  s   r  c                C   s   t | ttfs	J t |tttd fsJ t| d t|d tjfv d|  t|p/t }|p7t	dj
}td||| d}|  |jj}dgt|  |j_t |tjsYJ dd | D } |ridd |D ntj| }tj||| |d	|_|S )
Nr  r  r  r   )r  r   r\   r  c                 S   r  r8   r  r  r8   r8   r9   r?   m	  r}  z!empty_strided.<locals>.<listcomp>c                 S   r  r8   r  r  r8   r8   r9   r?   o	  r}  )r   r\   r  r  )rA   rB   rC   r   r:   rG   r  r`   r  rX  r   r  r   r   rl   r   r   ComputedBufferrf  rg  r  r  )r  r  r\   r  r   r  	pointwisebufferr8   r8   r9   r  ]	  s.   
r  c                C   s4   |d u r|   }|d u r|  }t||||||dS r  r  )r>   r  r  r\   r  r   r  r8   r8   r9   new_empty_strided|	  s   r	  c                 C   s2   dd |D }t tt||jd}tj| |S )Nc                 S      g | ]	}t jj|qS r8   )r3   r   r   rX  r  r8   r8   r9   r?   	      z copy_strided.<locals>.<listcomp>)key)sortedr   rl   __getitem__r   r  r  )r>   r  r  r8   r8   r9   copy_strided	  s   r  c                 K   s:   | d}|d ur|ntt||d< t|| fi |S )Nr\   )r  r   r   r  )r  r  r   r\   r8   r8   r9   full	  s   
r  c                    s   t | tsJ | tjksJ |  tdk}t|  | |  |  fdd}t	j
|  |  || dS )Nr   c                    s4   t | } t| dkrt|   |  < | S r   )rB   rl   r2   indirect_indexingr  rp   index_loaderr  rk  r8   r9   rL   	  s   zgather.<locals>.fnr   )rA   r+   rb   rG   rY  rm   rl   rs  r   r(   r   r   )r>   rp   r   sparse_gradr  rL   r8   r  r9   gather	  s   r  c                    s   |rJ t | tsJ t |tsJ dt| v sJ |  |  t| |  g | dd   fdd}tj| 	 |  |dS )Nr]   r   c                    s\   t | t ksJ |  d  | d  }t|d gg | d   }|S )Nz != r   )rl   r2   r  )r`  	var_index
weight_idxindices_loaderindices_ndimrm  weight_loaderweight_sizer8   r9   rL   	  s   "
zembedding.<locals>.fnr   )
rA   r+   strrb   r   rl   rm   r(   r   r   )r  r   padding_idxscale_grad_by_freqsparserL   r8   r  r9   	embedding	  s    r!  c           
         sX  t dd  D sJ ddd  D  tdd  D r"tddd t D }t|d	ks5J d
d gt  }t|t fdd|D  D ]\}}| |krXtd|||< t| }qJd	}t	|}|r}|d d u r}|
  |r}|d d u sq|r|d	 d u r|
d	 |d7 }|r|d	 d u stdd |D rtd|| }	|||	fS )Nc                 s   s4    | ]}|d ur|  tjtjtjtjfv V  qd S rN   )rb   rG   rY  r\  rh   r  r   r8   r8   r9   r   	  s    z.check_and_broadcast_indices.<locals>.<genexpr>z)indices must be int64, byte or bool. Got c                 S      g | ]
}|d ur|  qS rN   r   r   r8   r8   r9   r?   	      z/check_and_broadcast_indices.<locals>.<listcomp>c                 s   s,    | ]}|d ur|  tjtjfv V  qd S rN   )rb   rG   rh   r  r   r8   r8   r9   r   	  s    "zFallback for bool indicesc                 S   r~   r8   r   r   r8   r8   r9   r?   	  r   r   z"requires at least 1 non-None indexc                    r   r8   r8   r   r   r8   r9   r?   	  r@   z.Fallback when indices is on a different devicer   r   c                 s   s    | ]}|d u V  qd S rN   r8   r   r8   r8   r9   r   	  r   z.Fallback when None is in the middle of indices)r   r   r5   r   rl   r   r   r   rm   rB   pop)
r   r   
valid_idxsnew_indicesr   r>   
output_dimstart_offsettmp
end_offsetr8   r$  r9   check_and_broadcast_indices	  s<   
$

r,  c                    s   t ttfs	J |  t|  \dd D }dd D t|d |  fddttD dv rIdvrIt	dg d  t d   fdd}t
j|  |  |d	S )
Nc                 S   r"  rN   r4  r   r8   r8   r9   r?   	  r#  zindex_impl.<locals>.<listcomp>c                 S   r"  rN   r   r   r8   r8   r9   r?   	  r#  r   c                        g | ]} | d ur| qS rN   r8   r   r   r  r8   r9   r?   
  r  z0index is out of bounds for dimension with size 0c                    sn   t  t ks
J t t ksJ  fddtD }g  d  | d  }|S )Nc                    ,   g | ]\}}t j| | d qS r   r2   r  r=   loaderr  )r   r+  r`  r)  r8   r9   r?   
      z*index_impl.<locals>.fn.<locals>.<listcomp>rl   r   )r`  	new_index)r   r+  indexed_sizeindices_loadersoutput_sizer)  rk  r  r9   rL   
  s    zindex_impl.<locals>.fnr   )rA   rB   rC   r   r,  r   rm   r   rl   
IndexErrorr(   r   rb   )r>   r   r   indices_sizesrL   r8   )	r   r+  r8  r   r9  r:  r)  rk  r  r9   
index_impl	  s4   

r=  c                 C   s<   zt | |ddW S  ty   |   ttj| | Y S w NTr1  )r=  r5   r   r  r  r   r>   r   r8   r8   r9   r   !
  s   c                 C   r  NFr1  )r=  r?  r8   r8   r9   _unsafe_index+
  r  rA  c                 C      t t| |||S rN   )
index_put_r  r>   r   r   
accumulater8   r8   r9   	index_put8
  r  rF  c                 C   s   t t| |||ddS r@  )index_put_impl_r  rD  r8   r8   r9   _unsafe_index_put=
  s   rH  c                 C   sB   |  |   krt||   }|rt| |}t| t|d || S r   )r   r$  rF   r  r1  )r  r   r   rE  r8   r8   r9   index_put_as_masked_fillB
  s
   
rI  c                 C   s4   t |r|du st rdtj_t| ||| | S ra   )r%   rG   $are_deterministic_algorithms_enabledr3   r   disable_cudagraphsr   IndexPutFallbackr  r   r   rE  r8   r8   r9   index_put_fallbackJ
  s
   rN  c                 C   s   t | |||ddS r>  )rG  rM  r8   r8   r9   rC  S
  r  rC  c                    s  |  dkr9tdkr9d  tjtjhv r9d }tt| t|  D ]}t|d}q)t	| |g||S t
 rDt| ||S D ]}|d ur_| tjtjhv r_t| ||  S qF|  t}|  tjtjhv r|dkr|t| dg} t| ||} |dkrt| g } | S t||  }zt|  \W n ty   t| || Y S w dd D }	dd D t| tsJ |   |dkrt| dg} t|	d }
g d  |
t|	 d  fddttD t|} fdd	}tj|  |  | ||rd
nd d}td t| |}tj||_|dkr>t| g } | S )Nr   r   r   c                 S   r"  rN   r4  r   r8   r8   r9   r?   
  r#  z#index_put_impl_.<locals>.<listcomp>c                 S   r"  rN   r   r   r8   r8   r9   r?   
  r#  c                    r-  rN   r8   r   r.  r8   r9   r?   
  r  c                    sV   t  t ks
J  fddtD }g  d  | d  }|S )Nc                    r/  r0  r2  r3  )r   r+  r   r)  r8   r9   r?   
  r5  z;index_put_impl_.<locals>.output_indexer.<locals>.<listcomp>r6  )r   r7  )r   r+  expected_vals_sizer8  r9  r)  r   r9   output_indexer
  s    z'index_put_impl_.<locals>.output_indexer
atomic_addr   r\   r   r   rP  scatter_mode) 	get_numelrl   rb   rG   rh   r  r   rm   r[  rI  rJ  rN  rY  r>  r   r,  r   r5   rA   r+   r   rB   r   r   Scatterr   r  MutationLayoutr3   r   register_bufferr)  )r  r   r   rE  r   r  rw   r   x_ndimr<  r:  rP  scatterr  r8   )r   r+  rO  r8  r   r9  r)  r  r9   rG  X
  s   




rG  c                 C   r  rN   )r  r  copy_)r  r  r  r  r  r   output_viewr8   r8   r9   as_strided_scatter
  r  r\  c                 K   s   t t| |||fi |S rN   )scatter_r  )r>   rp   r   r  r   r8   r8   r9   rY  
  r  rY  r7  include_selfr7  r_  c             	   C   s`   | dkrdnd}|d |hvs ||kr|  tjtjhv s t r.tj| ||||||d |S d S )Naten.scatter_rF   sumr^  )rb   rG   rh   rY  rJ  r   ScatterFallback)rL   r  rp   r   r  r7  r_  	reduce_tyr8   r8   r9   scatter_fallback
  s   
rd  r7  c                C   sR   |dv sJ t d| ||||d}|r|S |dkrd}n|dkr!d}t| ||||S )N>   NrF   multiplyr`  re  rF   ra  rf  prod)rd  scatter_reduce_)r  rp   r   r  r7  fallback_resultr8   r8   r9   r]  
  s   r]  c                 C   rB  rN   )scatter_add_r  r>   rp   r   r  r8   r8   r9   scatter_add
  r  rl  c                 C   s   t t| |||dS )Nra  rh  r  rk  r8   r8   r9   rj  
     rj  c                 K   s   t t| ||||fi |S rN   rm  )r>   rp   r   r  reduction_typer   r8   r8   r9   scatter_reduce
  s   rp  )r_  c             	      s  |dv sJ t d |||d}|r|S ttsJ dt| v s&J t }|dkr6tdgttrIt dkrItdgt|tr\t| dkr\t|dg}t  	  |
 ttrr
 nd  fdd}fd	d
}	dd }
|stj  fdd| |d d}td t|}tj||_tj  |	| ||
|d}td t|}tj||_|dkrtg S )N>   Nra  amaxaminmeanrg  zaten.scatter_reduce_r^  r]   r   r   c                    s@     }t|}t| }t| |dkrdn|  | < |S r  )rm   rl   rB   r2   r  )r`  r\  r  indirect_idx)rp   r  r  r8   r9   rP     s   z'scatter_reduce_.<locals>.output_indexerc                    s   r| S t   S rN   r2   r  rb   r  )r  r  r  r8   r9   rL   *  s   zscatter_reduce_.<locals>.fnc                 S   s   | dkrdS | d u sJ d S )Nra  rQ  r8   re  r8   r8   r9   backend_reduce_str1  s   z+scatter_reduce_.<locals>.backend_reduce_strc                    s   t d  S r   ru  r   )r  r8   r9   r  >  r  z!scatter_reduce_.<locals>.<lambda>rR  )rd  rA   r+   r  rb   rl   rm   r>  rs  r   r   r   rU  r   r  rV  r3   r   rW  r)  )r  rp   r   r  r7  r_  ri  r  rP  rL   rv  zero_outr  rY  r8   )rp   r  r  r  r  r9   rh  
  sx   




rh  scales_x.nc           	         s   |    |  |   d   |  d   }dd  D  t|ks)J |}dd t |D tD ]
\}}|rC||< q9dd  fdd}tj|  | 	 |g ||dS )	Nc                 S   r
  r8   )r3   r   r   r  r   r8   r8   r9   r?   h  r  z&upsample_nearestnd.<locals>.<listcomp>c                 S   s   g | ]\}}|| qS r8   r8   )r=   r   or8   r8   r9   r?   m  s    c                 S   sB   t | tj} t | t |tj} t | tj} t j| |ddS r@  )	r2   r]  rG   r  r   r  r   r\  r  )r>   scaler  r8   r8   r9   scale_fnr  s   z$upsample_nearestnd.<locals>.scale_fnc                    sB   |  d  }| d   }g |fddt | D S )Nc                    s   g | ]\}}} |||qS r8   r8   )r=   r   rB  r  )r|  r8   r9   r?   |  r   z2upsample_nearestnd.<locals>.fn.<locals>.<listcomp>)r   )r`  r>   r   i_sizesry  r|  scalesrk  r8   r9   rL   x  s
    zupsample_nearestnd.<locals>.fnr   )
realize_hintr   rm   rl   r   r   r(   r   r   rb   )	r>   r:  rx  ry  batcho_sizesr   r{  rL   r8   r}  r9   upsample_nearestnda  s(   
r  r  c                 C   s   t | ||fddS )Nr   ry  r  )r>   r:  r  r8   r8   r9   upsample_nearest1d  r  r  scales_hscales_wc                 C   s   t | |||fddS )Nr   r  r  )r>   r:  r  r  r8   r8   r9   upsample_nearest2d  s   r  scales_dc                 C   s   t | ||||fddS )NrR   r  r  )r>   r:  r  r  r  r8   r8   r9   upsample_nearest3d  s   r  c                    s   t  fdd|D S )Nc                 3   s    | ]	}t | V  qd S rN   r  r   r_   r8   r9   r         z$_create_constants.<locals>.<genexpr>rC   )r\   ru   r8   r_   r9   _create_constants  s   r  align_cornersc              
      s   |    |  |  \}}	
|\}}tjj		tjj

dd ddd}	dd dd d	d
 fddfdd|		| ||	
| |dd  	
f
dd}
tj| 	 | 
 |
||t|t|gdS )Nc                 S   s   | t t jjkrt jS t jS rN   )rG   r  r\  r  rY  )maxvalr8   r8   r9   get_int_dtype  s   z1upsample_bicubic2d_default.<locals>.get_int_dtypec                 S   s@   |r|dkr| d |d  S dS |d ur|dkrd| S | | S rg  r8   )in_sizer  r  r{  r8   r8   r9   compute_scale  s    z1upsample_bicubic2d_default.<locals>.compute_scalec                 S   sJ   t |tj}t | tj} |rt | |S t dtj}| ||  | S )N      ?)r2   r]  rG   r  r  r   )r{  	dst_indexr  dst_index_iehalfr8   r8   r9   compute_source_index  s   z8upsample_bicubic2d_default.<locals>.compute_source_indexc                 S   s8   t |d |d dtjd\}}}||  | |  |  | S )Nr   rR   r   r_   r  rG   r  )r>   A_Ap2_Ap3_1r8   r8   r9   cubic_convolution1  s    z6upsample_bicubic2d_default.<locals>.cubic_convolution1c                 S   sD   t |d| d| d| tjd\}}}}||  | |  | |  | S )NrS   rT   rW   r_   r  )r>   r  _A_4A_5A_8Ar8   r8   r9   cubic_convolution2  s   z6upsample_bicubic2d_default.<locals>.cubic_convolution2c                    sb   d}t dtj}t | ||} | |}t || } ||}t |||}||||fS )Ng            ?)r2   r  rG   r  rF   sub)r   r  r  c0c1x2c2c3)r  r  r8   r9   get_cubic_upsample_coefficients  s   

zCupsample_bicubic2d_default.<locals>.get_cubic_upsample_coefficientsc                    sH    |}| d |d  | d |d   | d |d   | d |d   S )Nr   r   r   rR   r8   )xsr   cs)r  r8   r9   cubic_interp1d  s   @z2upsample_bicubic2d_default.<locals>.cubic_interp1dc                 S   s   t |t || S rN   )r2   maximumminimum)r_  r  r  r8   r8   r9   clamp     z)upsample_bicubic2d_default.<locals>.clampc           
         s   | \ }}
|}t |}t ||
|}t |}t ||} 	fddt |d t |d tfdddD }tfdddD fdd	tfd
d|D }	|	|S )Nc                    sr   t dtj}t d tj}t d tj}t j| ||dd}t j|||dd} ||gS )Nr   r   Fr1  )r2   r  rG   r\  r  )fyrB  _0iHm1iWm1iyix)cr  iHiWry  rk  r8   r9   load_bounded  s   z<upsample_bicubic2d_default.<locals>.fn.<locals>.load_boundedr   c                 3       | ]	}t  |V  qd S rN   r2   rF   r=   ofs)r  r8   r9   r     r  z9upsample_bicubic2d_default.<locals>.fn.<locals>.<genexpr>)r   r   r   r   c                 3   r  rN   r  r  )r  r8   r9   r     r  c                    s"   t  fddD }|S )Nc                 3   s    | ]} |V  qd S rN   r8   r<   )r  rc  r8   r9   r     r   zOupsample_bicubic2d_default.<locals>.fn.<locals>.get_x_interp.<locals>.<genexpr>r  )rc  coeffs_x)r  ixs_ofsr  t_x)rc  r9   get_x_interp   s   
z<upsample_bicubic2d_default.<locals>.fn.<locals>.get_x_interpc                 3   s    | ]} |V  qd S rN   r8   )r=   rc  )r  r8   r9   r     r   )r2   rU  r  r   rC   )
r`  oyoxreal_xin_xreal_yin_yt_yiys_ofscoeffs_y)
r  r  r  r  r  height_scaler  r  width_scalerk  )r  r  r  r  r  r  ry  r  r9   rL     s   

	
z&upsample_bicubic2d_default.<locals>.fnr   rN   )r  r   rm   r3   r   r   r  r(   r   r   rb   rc   r   )r>   r:  r  r  r  NCoHoWr  rL   r8   )r  r  r  r  r  r  r  r  r  r  r  r  rk  r9   upsample_bicubic2d_default  s.   
	 r  c              	      s   t |dksJ |\}}|  |  ^ } tjj  tjjdd  fdd}tj| 	 | 
 |g |t  | t | dS )NrS   c                 S   sj   |}t |d tj}t | tj} t | t |tj} t |t t |t | } t j| |ddS )Nr   Fr1  )r2   r  rG   r\  r]  r  absr  )r>   r  r  size_numr8   r8   r9   reflect  s    z!reflection_pad2d.<locals>.reflectc                    s8   | ^ }}}| }|}g |||S rN   r8   )r`  r   r>   rc  hleftr  topr  rk  r8   r9   rL   !  s   zreflection_pad2d.<locals>.fnr   )rl   r   rm   r3   r   r   r  r(   r   r   rb   rc   r   )r>   r  r  botr  rL   r8   r  r9   reflection_pad2d  s   *r  c                    s   t |dksJ |\ | ^ }tjjd tjjd |  |  ^ }}} fdd}tj| 	 | 
 |t| dS )NrS   r   c                    s  | ^  }} fdddd | |	 | 	| }}d  | d 	 | }}d  f}d	 
 f}t ||}	t |	fddd	dfd
d	}
|
|||d	f |
|||
 d f |
||df| |
|| d f| |
|||df|d	f |
|||df|
 d f |
||| d f|d	f |
||| d f|
 d f S )Nc                    s   g  | |S rN   r8   rb  )r   grad_loaderr8   r9   load_from_output=  r  z?reflection_pad2d_backward.<locals>.fn.<locals>.load_from_outputc                 S   sP   | \}}}t |tj}t |tj}t |tj}t t ||t ||S rN   )r2   r]  rG   r\  rY  r  r  le)index_ranger   lbubr8   r8   r9   index_range_condition@  s
   
zDreflection_pad2d_backward.<locals>.fn.<locals>.index_range_conditionr   r   c                      s
    S rN   r8   r8   )center_xcenter_yr  r8   r9   r  \     
 z7reflection_pad2d_backward.<locals>.fn.<locals>.<lambda>r  c                    s   |d |d k }t |tr|rd S |}|d ur2|d |d k }t |tr*|r*d S t||}t| fddd}t|d S )Nr   r   c                      s
    S rN   r8   r8   )r  out_xout_yr8   r9   r  l  r  zKreflection_pad2d_backward.<locals>.fn.<locals>.accumulate.<locals>.<lambda>r  )rA   rh   r2   r  r  rF   )r  r  index_range1index_range2upper_less_than_lower1r6   upper_less_than_lower2g)gradr  r  )r  r  r9   rE  ^  s   z9reflection_pad2d_backward.<locals>.fn.<locals>.accumulater   rN   )r2   r  r  )r`  r>   rc  top_reflect_xleft_reflect_ybot_reflect_xright_reflect_yrange_cxrange_cyr6   rE  r  r  r  r  r  r  r  )r   r  r  r  r  r  r9   rL   :  s0   """$z%reflection_pad2d_backward.<locals>.fnr   )rl   rm   r3   r   r   r  r   r(   r   r   rb   rB   )grad_outputr>   r  rw   h_gradw_gradrL   r8   r  r9   reflection_pad2d_backward/  s   B
r  c                    s:   |   |   fdd}tj|  |  |dS )Nc                    sF   t | } t| tksJ  D ]}| d | |  | |< q| S r;  )rB   rl   )r`  rp   r?  r8  rk  r8   r9   r4    s
   zrev.<locals>.loaderr   )r   rm   r(   r   r   rb   )r>   r?  r4  r8   r  r9   rev  s   r  c              	      st  t |d dks
J tdd |D rt| S |  }tttt|d d d |dd d  t |t   g  D ]\}}t|tj	rP|j
rPtjj|n|}||f q<t|d  }g t |d  D ]\\}}	}
|
 |t|
| |	  qmt |t |ksJ t|   fddfdd	}|  tj|  |  ||d
S )Nr   r   c                 s   r   r   Nr8   )r=   ro  r8   r8   r9   r     r   z"constant_pad_nd.<locals>.<genexpr>r   c                    s~   g }t  d  D ]\}\}}}|dkr|t|d |dkr+|t|| qttj|}t| fddS )Nr   c                      r  rN   r8   r8   )r   rk  r8   r9   r    r  z/constant_pad_nd.<locals>.mask.<locals>.<lambda>)	r   rz   range_mask_lowrange_mask_highr   r7  r2   r  r  )r   r  r`  r  r  r  )boundsr  
mask_sizesry  rk  r   r9   r    s   "zconstant_pad_nd.<locals>.maskc                    sZ   t | d  }t| d   D ]\}\}}|||  qt|t| ks)J |S rN   )rB   r   rz   rl   )r   r7  r`  r  r  )bounds_precompr  ry  r8   r9   	offset_fn  s
   z"constant_pad_nd.<locals>.offset_fnr   )rl   r   r  rm   rB   r   r   rA   rc   r   r   r3   r   r   lookup_precomputed_sizerz   r   r   rb   r   r(   r   r   )r>   r  r  r8  lr  	l_precompr:  r  r  r  r  r8   )r   r  r  r  r  ry  rk  r9   constant_pad_nd  s>   *


r  r   c                 C   s&   t t | tjt t|tjS rN   )r2   r  r]  rG   rY  rc   r   )r   r  r8   r8   r9   r    s   r  c                 C   s    t t | tjt |tjS rN   )r2   r  r]  rG   rY  )r   r  r8   r8   r9   r    s   r  c                 C   s   t t| |t| |S rN   )r2   r  r  r  )r   r  r  r8   r8   r9   
range_mask  s   r  r  c              	      sX     ^ } rd ndrd nd f	dd}|S )Nr   r   c                    sr   | ^  t t   t	  }r+t | 
fddS t | fddS )Nc                      s   t g  S rN   )constant_boundary_condition_2dr8   )ihiwpad_fill_valueprefixr>   r8   r9   r    s    z>constant_boundary_condition_2d.<locals>.load.<locals>.<lambda>c                      s   g  S rN   r8   r8   )r
  r  r  rk  r8   r9   r    r@   )r2   r  r  r  )r   r  	r  r  r  r  	padding_h	padding_wr  r>   rk  )r
  r  r  r9   r     s   	z,constant_boundary_condition_2d.<locals>.loadrm   r   )r>   r  r  r  rw   r   r8   r  r9   r	    s   r	  c                 C   s   t | d||   || d  || d  || }|r|t | d||   || d  d|| d   || }tjj|d ||  |  ||  dkra|d8 }tjjd|||  |  ||   tjj|| dkrztjj|| d}||fS |}||fS )Nr   r   r   F)r   r3   r   r   rX  r  r   )r>   r   kernel_sizer  r  	ceil_modex_outx_altr8   r8   r9   pooling_size  s    ,0*$r  c                    s  dkrddg|dkrddg}s t  d t dt dt |d}t| ts/J t dks7J tdks?J tdksGJ t|dksOJ t|  dv sYJ |   |  ^ }}t|d |\}}	td |\}
}d sd s|	s|rt| tdn| 	 t
|||
g } d  d  }|dkstdd |D rt|  ||S  fd	d
}tj|  |  tj|dd|d}tj|  tjtj|dd|d}||fS )Nr   r   r   rR   rS   z-inf   c                 s       | ]}|d kV  qdS r5  r8   r{  r8   r8   r9   r   ?  r   z*max_pool2d_with_indices.<locals>.<genexpr>c                    s   | ^ }}}d }d }t t d t d D ]R\}}|d  | d  }|d  | d  }g |||}	|r]t| | tj}
|d u rR|
}ntt|	||
|}|d u rd|	}qt	|	|}q|ro|S |S r  )
r   productr   r2   r]  rG   rY  r1  gtr  )r`  return_indexr  bhbwr  maxindexr
  r  rG  r   r  r  r  r  rk  r8   r9   rL   E  s$   $z#max_pool2d_with_indices.<locals>.fnF)r  r   T)r0   rA   r+   rl   rm   r  r  r	  r   r   rB   r    fallback_max_pool2d_with_indicesr(   r   r   rb   r   r   rG   rY  )r>   r  r  r  r  r  r  r  h_out
ceil_mode1w_out
ceil_mode2rm  window_sizerL   r1r2r8   r   r9   max_pool2d_with_indices  sV   



r)  c                    s  dkrddg|dkrddg}st |tsJ tdks#J tdks+J tdks3J t|dks;J t| dv sEJ |   z|  }W n tyZ   d }Y nw t |trt |jjtr|jj}	t	j
d t	j|	 |	 |	 d|	d}
|
  |
 }nz| }W n ty   d }Y nw |d ur|d dkp|d uo|d dk}tjptjptj}tdd |D s|r|st| ||||S |  | ^ }}
|  ^ }| |   t| }tfd	d
td d D tfdd
td d D 		 }|dkr)t| ||||S |  	
fdd}tj|  |  ||dS )Nr   r   r   r  )r   r\   r  )r)  r  r   c                 s   r  r5  r8   r{  r8   r8   r9   r     r   z3max_pool2d_with_indices_backward.<locals>.<genexpr>c              	      8   g | ]}t |d   t d | d   d    dqS r   r   r  r=   r  r  r  r8   r9   r?         *z4max_pool2d_with_indices_backward.<locals>.<listcomp>c              	      8   g | ]}t |d   t d| d   d    d qS r   r   r,  r=   r  r.  r8   r9   r?     r/  r  c                    sV  | ^ }}}t |
 | tj}|d  }|d  }t t|d  d  d tj}t t|d  d  d tj}t t|d d tj}t t|d d tj}t |t dtj}t |t dtj}t |t tj}t |t tj}d }	tD ]}
t	D ]}t 	|t |
tj}t 	|t |tj}g |t j
t |t |t dtjd ddt j
t |t |t dtjd dd}|} |}t ||}|	d u rt ||t dtj}	qt t t ||t |||}t |t 	|	||	}	qq|	d us)J |	S )Nr   r   Fr1  r   r  )r2   r]  rG   r\  r   r  r  r  r   rF   r  r  r  r1  r  r  r  )r`  r  r  r  
index_testphstartpwstartphendpwendgradientph_pw_phpw
grad_indexindex_actual	grad_partr   r  r  h_window_sizer  indices_sizer  r  pooled_heightpooled_widthr  w_window_sizewidthr8   r9   rL     sl     


#z,max_pool2d_with_indices_backward.<locals>.fnr   )rA   r+   rl   rm   r  
get_strideAttributeErrorr   r(   r   r  rf  r   rb   decide_layoutr   coordinate_descent_tuningmax_autotunemax_autotune_pointwiser   )fallback_max_pool2d_with_indices_backwardr   rB   r  r   r   )r  r>   r  r  r  r  r  r   	gO_strider   x_bufferx_strideis_channels_lastautotuner  heightrw   rm  r&  rL   r8   rA  r9    max_pool2d_with_indices_backwardq  s   	

 ;rU  c                    s(   |   ^ }}}|    fdd}|S )Nc              
      s   |\|\ |\}}t t t   tjt |tjt t  tjt |tj}t | fdddS )Nc                      s   g    S rN   r8   r8   )h_start_indexr
  r  r  w_start_indexrk  r8   r9   r  %  r#  z3pad_adaptive_loader.<locals>.load.<locals>.<lambda>r  )r2   r  r  r]  rG   rY  r  )r  
incrementsstart_indicesend_indicesh_end_indexw_end_indexr  rk  )rV  r
  r  r  rW  r9   r     s$   z!pad_adaptive_loader.<locals>.loadr  )r>   rw   r  r  r   r8   r]  r9   pad_adaptive_loader  s   r^  c                    s(   |\|\  fdd}|S )Nc                    s   | ^ }}}|} |}|}|}d }	t td td D ]\}
}|||
|g||g||g}|	d u r>|}	q&t||	}	q&|	S r  r   r  r   r2   rF   )r`  r4  r  r  r  rV  r[  rW  r\  totalr
  r  rG  h_end_index_fnh_start_index_fnkernel_maxesw_end_index_fnw_start_index_fnr8   r9   fn_sum0  s"   $z)_adaptive_pooling_idx_sum.<locals>.fn_sumr8   )rd  start_index_fnsend_index_fnsrg  r8   ra  r9   _adaptive_pooling_idx_sum,  s   rj  c                    s  t tsJ t|dksJ    ^ }}}tjj|}tjj|}|\}}||kr9||kr9t	S |dksA|dkrTg |||}t
|  dS || dkrm|| dkrm|| || g}t|S t|| d |}	t|| d |}
t|||g } }dd }dd }tj|||d	}tj|||d	}tj|||d	}tj|||d	}|	|
 }|d
krt|S t|	|
g||g||g tt fdd}tj |||d}|S )Nr   r   re  r   c                 S      t | | |S rN   r   r   out_diminp_dimr8   r8   r9   start_indexk     z)_adaptive_avg_pool2d.<locals>.start_indexc                 S   s   t | d | | d |S r;  rl  rm  r8   r8   r9   	end_indexn  r  z'_adaptive_avg_pool2d.<locals>.end_indexrn  ro  r  c                    s   t  | t | S rN   )r2   divr^  r  rg  ones_loaderr>   r8   r9   rL     s   z _adaptive_avg_pool2d.<locals>.fnr   )rA   r+   rl   r  rm   r3   r   r   r  r  rl  rb   r   
avg_pool2dr.   rB   r   r   fallback_adaptive_avg_pool2drj  r^  	ones_liker(   r   )r>   r:  r  h_inw_inr"  r$  o_sizer  h_kernel_maxw_kernel_maxrm  r\   rp  rr  rV  r[  rW  r\  r&  rL   rvr8   ru  r9   _adaptive_avg_pool2dM  sT   

r  c                    s"      ^ }}}tjj|}tjj|}|^ }}}	|| dkr9||	 dkr9t|| ||	 gddS t||}
t||	}dd fdd}tj	||d}tj	|||d}tj	|	|d}tj	||	|d}t
|
|g||g||g  fd	d
}tj  |t|d}|S )Nr   r   )divisor_overridec                 S   rk  rN   )r   rm  r8   r8   r9   rp    rq  z0upsample_nearest2d_backward.<locals>.start_indexc                    s    | d ||S r;  r8   rm  )rp  r8   r9   rr    r  z.upsample_nearest2d_backward.<locals>.end_indexrs  c                    s    | t S rN   )r^  r  )rg  r>   r8   r9   rL     rq  z'upsample_nearest2d_backward.<locals>.fnr   )r  rm   r3   r   r   r  rw  r.   r   r   rj  r(   r   r   rb   rB   )r>   r:  
input_sizer  r  r  inp_hinp_wout_hout_wr}  r~  rr  rV  r[  rW  r\  rL   r  r8   )rg  rp  r>   r9   upsample_nearest2d_backward  s8   

r  r8   c                    s  ss
ddgt dt dt dt| ts J tdks(J tdks0J tdks8J t|  dv sBJ |   |  ^ }}}	t|d|\}
}t|	d|\}}d spd sp|sp|rxt| dd}n|  d}t	||
|g }| 
  d d  }|dkrt| |||S fd	d
|r|r|rd| n
dd d    fdd}ntt| d|rЈnd fdd}tj|   ||d}|S )Nr   r   r  r   r  TFr  c           	         s   | ^ }}}d }t t d t d D ]3\}}|d  | d  }|d  | d  }|g |||}|d u rC|}qt||}q|S r  r_  )	r`  r4  r  r  r  r`  r
  r  rG  )r  r  r  r8   r9   rg    s   $zavg_pool2d.<locals>.fn_sumr  c                    s   t | t  S rN   )r2   r   r  r  )r\   rg  r{  rk  r8   r9   rL     r  zavg_pool2d.<locals>.fnc                    s   t  |  | S rN   r2   rt  r  )rg  rv  rk  r8   r9   rL     r  r   )r0   rA   r+   rl   rm   r  r  r	  r   rB   rb   fallback_avg_pool2dry  r(   r   r   )r>   r  r  r  r  count_include_padr  r  r  r  r"  r#  r$  r%  had_paddingrm  r&  rL   r  r8   )r\   rg  r  rv  r  r{  r  rk  r9   rw    sf   






rw  c                    s  d u sdksJ dssddgt | tsJ t |ts$J tdks,J tdks4J tdks<J t| dv sFJ |   | ^ }td|\}	}
td|\}}|  d pwd pw|
pw||  ^ }	
t| }| }t	fddt
d d D t	fddt
d d D  }|d	krt| ||S fd
d  	
fdd}tj|  |||d}|S )Nr   zdivisor must be not zeror   r  r   c              	      r*  r+  r,  r-  r.  r8   r9   r?   N  r/  z'avg_pool2d_backward.<locals>.<listcomp>c              	      r0  r1  r,  r2  r.  r8   r9   r?   T  r/  r  c              	      sX  t d tj}t d tj}t d tj}t d tj}t d tj}t d tj}t t | ||}t t |||}	t t ||t t  tj|}
t t |	|t t tj|}t 	|t dtj}t 	|	t dtj}	t |
t  tj}
t |t tj}t t |
|t ||	}|S )z{
        This computes the scaling factor that we will divide an element
        by when `count_include_pad=False`
        r   r   )
r2   r  rG   r\  r  r   r  rF   r]  r  )r<  r=  stride_hstride_wpad_hpad_wkernel_hkernel_whstartwstarthendwenddivide_factor)rT  r  r  r  rG  r8   r9   !compute_pool_size_without_paddingh  s,   

z>avg_pool2d_backward.<locals>.compute_pool_size_without_paddingc                    sR  | ^ }}}|d  }|d  }t t|d  
d  
d tj}t t|d  
d  
d tj}t t|
d d tj}t t|
d d tj}t |t dtj}t |t dtj}t |t tj}t |t 	tj}d }tD ]}	tD ]}
t 	|t |	tj}t 	|t |
tj}d ur}nssd d  }n ||}t 
g |t jt |t |t dtjddt jt |t |t dtj	dd|}t t ||t ||}|d u rt ||t dtj}qt |t 	|||}qq|d us'J |S )Nr   r   Fr1  r  )r2   r]  r   rG   r\  r  r  r  r   rF   truedivr  r  r  r  r1  r  )r`  r  r  r  r5  r6  r7  r8  r9  r:  r;  r<  r=  r{  partr  )r  r  r  r  rB  r  r  r  rD  rE  r  rF  r8   r9   rL     sv     
	


*zavg_pool2d_backward.<locals>.fnr   )rA   r+   rl   rm   r  r  r   rB   rb   r  r   fallback_avg_pool2d_backwardr(   r   r   )r  r>   r  r  r  r  r  r  r  r"  r#  r$  r%  rw   rm  r\   r&  rL   r  r8   )r  r  r  r  rB  r  rT  r  r  rD  rE  r  rF  rG  r9   avg_pool2d_backward&  sf   "Ar  c                 C   s   |   }t|tr|g}n|stt|}t|dkr*t|dv s(J d| g S t|}tt|D ]5}|| dk rL||  t|rHt|nd7  < d||   krZt|k sin t|dkrg|| dksiJ q4tt|t|ksxJ d|S )Nr   )r8   r   r   zinvalid axis: r   zreduction axis not unique)rm   rA   r]   r   rl   rC   rB   rD   )r>   axisr  r   r8   r8   r9   _validate_reduction_axis  s    
 :r  c          
         s   |d ur	t | |} |  tt| |}g }g g }g ttD ]}||v r5| ||  q"| ||  q" fdd}r_t}	D ]	}t	d|	|< qTn|}	| 
  t|  |pn|  |  ||	|dS )Nc                    s   t |t ks
J r,t  t ksJ t fddD s#J  fddD  t  t ks6J d gt  t |  }tt t|D ]\}}|||< qM|S )Nc                 3   s    | ]	} | d kV  qdS r  r8   r   r   r8   r9   r     r  z8_make_reduction_inner.<locals>.loader.<locals>.<genexpr>c                    r   r8   r8   r   r   r8   r9   r?     r@   z9_make_reduction_inner.<locals>.loader.<locals>.<listcomp>)rl   r   r   chainr   )r   reduction_indexr7  r`  varinner_loaderkeepdimskept_idxreduced_idxr  r   r9   r4    s   
z%_make_reduction_inner.<locals>.loaderr   )r   	dst_dtype	src_dtyper   r   reduction_ranges)r   rm   rD   r  r   rl   rz   rB   rc   r   r   dictr   rb   )
r>   r  r  r\   r   
kept_sizesreduced_sizesr   r4  rm  r8   r  r9   _make_reduction_inner  s<   



r  ro  c                    s   dd d fdd}|S )NFr_   c                   s@   t | ||| d}tjddi|}t|jjtr|  |S )Nr  r  r\   r   ro  r8   )r  r)   r   rA   r   r   )r>   r  r  r\   r   r  r   ro  r8   r9   r     s   zmake_reduction.<locals>.innerNFr8   )ro  r   r   r8   r  r9   make_reduction  s   r  r_   c                   s   |d ur	t | |} |   t| |}|  }|tjtjfv r$t | tj} t| ||}t	 fdd|D }t
||  |  }t|t| }t t|||S )Nc                 3       | ]} | V  qd S rN   r8   r   r  r8   r9   r   0  r   zmean.<locals>.<genexpr>)r   rm   r  rb   rG   float16bfloat16r   sum_r1   r   r$   r   r#   r   rB   rt  )r>   r  keepdimr\   output_dtype
sum_resultdenomr8   r  r9   rs  %  s   

rs  c           
         s   |d u rd}|    t| |}t| |dd}|r|  tt| |}t|||}t fdd|D }|r:|| }t	|| 
 |  }t|t|  }t||}	|sX|	S |r\|nt||}|	|fS )Nr   T)r  c                 3   r  rN   r8   r   r  r8   r9   r   C  r   z var_mean_sum_.<locals>.<genexpr>)rm   r  rs  r   squarer  r  r1   r   r$   rb   r   r#   r   rB   rt  rC  )
r>   r  
correctionr  return_meanx_meandiffsr  r  x_varr8   r  r9   var_mean_sum_6  s&   

r  c                 C   sV   t | |}t| ||d d d}|d }t|d }t|tjo*t|tjk o*t|dkS )Nr  r   r  r   )	r  r  r1   rA   rc   r   r]   r   unroll_reductions_threshold)r>   r  r  r   r   reduction_numelr8   r8   r9   use_two_step_varianceP  s   


r  c                   s    d u rd t | ||d d d}|d}|d |d tjjd|fd|  d|\}}}	|  |  |  t| |}t	fdd	|D d
d  fdd}
t
|
|}|rj|  ||fS |S )Nr   r  r   r  r  welford_reduce)	inner_fnsro  r\   c                 3   r  rN   r8   r   r  r8   r9   r   v  r   z$var_mean_welford_.<locals>.<genexpr>c                 S   s6   t | tjr|  stt| tj|S t	| |S rN   )
rA   rc   r   is_constantr2   r   r]  rG   rY  r  r  r8   r8   r9   get_constant_or_index_exprx  s   z5var_mean_welford_.<locals>.get_constant_or_index_exprc                    s     }}| ||  S rN   r8   )r   r  r  )r  r\   r  rnumelr8   r9   r|  }  s   

z#var_mean_welford_.<locals>.scale_fnr8   )r  r%  r   WelfordReductionr   rb   r   rm   r  r1   r   )r>   r  r  r  r  r   r4  rs  m2rw   r|  r  r8   )r  r\   r  r  r  r9   var_mean_welford_`  s6   




r  )r  r  c                C   2   t | ||drt| |||ddS t| |||ddS )Nr  r  Fr  r  r  r  r  r  r  r>   r  r  r  r8   r8   r9   var_     

r  c                C   r  )Nr  Tr  r  r  r8   r8   r9   var_mean  r  r  c                 C   st   |dk rt t| | |S |dkrtd|S |dkr| S t | |d |}t||}|d dkr8t|| }|S )Nr   r   r   )pow_recursiver2   
reciprocalr  r   )r>   rc  r\   r  r8   r8   r9   r    s   r  c                 C      t | |S rN   )r2   powr   r   r8   r8   r9   
pow_native  r  r  )r   c                    s.  t trtkrt tS t trdkrt S t tr,dkr,t S tdd  fD }t|}t toQd  k oIdk n  pQ|oQdk}|ro   fdd	}t	j
    |  d
S t  tr dkr}tdS  dkrt rtS |rt S t S )Nr  r   c                 s   s$    | ]}t |tjr| V  qd S rN   )rA   r   r+   rb   r<   r8   r8   r9   r     rf  zpow.<locals>.<genexpr>i    r   c                    s   t |   S rN   )r  rb   r  r   r   r4  r8   r9   rL     r  zpow.<locals>.fnr   r   )rA   r   r]   r  sqrtr  r   r   r   r(   r   r   rb   rm   r   rM  r   exp2fallback_powr  )r   r   r\   is_integer_powembed_exponentrL   r8   r  r9   r    s8   
"



r  c                 C   s   t | tr	| j}n| }t |tr|j}t |tjs3tj|  |  |	 | 
 dj}t |tjs3J t |tjrN| sNt |jtjsN|  |j|_| S tj|| | S r  )rA   r+   r   r   
StorageBoxr(   r   r   rb   r   rm   is_input_buffer	NopKernelr   rV  realize_into)changedrG  changed_datar8   r8   r9   r    s2   

r  c                 C   s   t | t| |S rN   )r  rM  )r>   r  r8   r8   r9   fill_  r~  r  c                 C   s4   t ||  }t||  }t||  }t| |S rN   )r$  r   r   rb   r   rm   r  )dstr  r  r8   r8   r9   rZ    s   
rZ  c                 C   r  rN   )r2   floordivr  r8   r8   r9   r    r  r  c                 C   r  rN   )r2   truncdivr  r8   r8   r9   r    r  r  c                 C   s   t | ot |}t| ot|}|dkr(|rJ d|r!t| |S tt| |S |dkr@|r2J d|r9t| |S tt| |S t| |S )NrU  z5floordiv operands can not be boolean at the same timerW  z5truncdiv operands can not be boolean at the same time)rg   ri   r  rU  rt  r  rW  )r   r   rounding_modeboth_integerboth_booleanr8   r8   r9   div_mode  s   
r  c                 C   s8   t | ot |}|rt| |S ttjj}t|| |S rN   )ri   logical_andr&   r  r   r'  r   )r   r   	both_boolrL   r8   r8   r9   r   )  s
   
r   c                 C   s4   t | pt| }|rt| |S dd }t|| |S )Nc                  W   r0  rN   r  r   r8   r8   r9   rL   <  r2  zdiv_prim.<locals>.fn)ri   rg   r  r   r   r   is_integralrL   r8   r8   r9   div_prim5  s
   
r  c                 C   s4   t | pt| }|rdd }ndd }t|| |S )Nc                 S   r  rN   )r2   modr  r8   r8   r9   rL   O  r  zfmod.<locals>.fnc                 S   r  rN   )r2   fmodr  r8   r8   r9   rL   T  r  )ri   rg   r   r  r8   r8   r9   r  I  s
   
r  c                 C   s:   |   }t|st|rt| t } dd }t|| S )Nc                 S   s
   t | S rN   )r2   rsqrtrf   r8   r8   r9   _rsqrt`  r2  zrsqrt.<locals>._rsqrt)rb   r   r   r   rG   r  r   )r>   r\   r  r8   r8   r9   r  Z  s
   r  c                C   B   t |  st|  r|d u rtj}td|d}|| |||dS )Nra  r  r_   r   rb   r   rG   rY  r  r>   r  r  r\   rL   r8   r8   r9   r  f     

r  c                C   r  )Nrg  r  r_   r  r  r8   r8   r9   rg  q  r  rg  c                 C   s   t | tj} td| ||dS )Nr   r  r  )r   rG   rh   r  r>   rp   r  r8   r8   r9   
reduce_any|  s   r   c                 C   2   |d urt | ||dt| ||dfS t | d |dS Nr  )reduce_amaxreduce_argmaxr  r8   r8   r9   
reduce_max  
   r  c                 C   r  r  )reduce_aminreduce_argminr  r8   r8   r9   
reduce_min  r  r	  xor_sumr  r  argmaxr  argmin
logical_or)r   r   c                 C   r  )Nr  r,  r   INT_TO_FLOATr   r8   r8   r9   register_pointwise_numeric  s   r  c                 C   s   t | tjddS )NT)rj   r*  r  r  r8   r8   r9    register_pointwise_numeric_ldf64  s
   r  r-  logical_not)r   )rj   r   r   identityc                    s   t | d d fdd}|S )Nr  c                     s.    | i |}t || d  }t| d |S r   )r   rb   r  )ru   r   r  outplace_opr8   r9   rL      s   zregister_inplace.<locals>.fn)r   )aten_opr  rL   r8   r  r9   register_inplace  s   
r  c                 C   s.   t jj }|d usJ | |jjjv sJ | S rN   )rG   _guardsTracingContextr  	fake_mode	shape_envvar_to_range)r   r  r  tracing_contextr8   r8   r9   sym_constrain_rangeI  s   r  c                 C      |   | S rN   r4  r   rp   r8   r8   r9   sym_sizeQ  r  r"  c                 C   r   rN   )rH  r!  r8   r8   r9   
sym_strideV  r  r#  c                 C   s   |   S rN   )rT  r|  r8   r8   r9   	sym_numel[  r  r$  c                 O   r  )NzHelpful for debuggingr4   )r  ru   r   r8   r8   r9   foobard  r  r%  c                 C   s   |    t| S rN   )r   r  rf   r8   r8   r9   _realizei  s   r&  c                 C   s   t tj| S rN   )r+   r   r   Wait)r  r8   r8   r9   waitt  r  r(  c                 C      t j| ||||S rN   )r   	AllReducer   r  	reduce_optagranks
group_sizer8   r8   r9   	allreducex  rn  r0  c                 C   s   t tj| |||S rN   )r+   r   r   AllGatherIntoTensor)shardr-  r.  r/  r8   r8   r9   all_gather_into_tensor|  s   r3  c              	   C   r  rN   )r+   r   r   ReduceScatterTensorr+  r8   r8   r9   reduce_scatter_tensor  s   r5  c                 C   r)  rN   )r   AllReduceCoalescedr   r+  r8   r8   r9   all_reduce_coalesced  rn  r7  c                 C   s"   t j| |||}tttj|S rN   )r   AllGatherIntoTensorCoalescedr   rB   mapr+   )r  r-  r.  r/  r  r8   r8   r9    all_gather_into_tensor_coalesced  s   r:  c                 C   s$   t j| ||||}tttj|S rN   )r   ReduceScatterTensorCoalescedr   rB   r9  r+   )r  reduceOpr-  r.  r/  r  r8   r8   r9   reduce_scatter_tensor_coalesced  s   
r=  zRInductor support for distributed collectives depends on building torch.distributedr4  )quantized_loweringsrN   )NNNNF)F)r   r   rr  r   r  )r   r   r   r  )Tra   )r   NNr   )NN)r   FF)r   )NNN)Nr  )Nr   r   F)NNNN)r8   r   FTNr  (z  r   r   loggingrN  r9  collectionsr   collections.abcr   typingr   r   r   r   r   rc   rG   torch.fxtorch.utils._pytreeutils_pytreer  torch._prims_commonr	   r
   r   r   r   r   r   r   r   r   r   %torch.fx.experimental.symbolic_shapesr   r   r   torch.utils._sympy.functionsr   r   r   _dynamo.utilsr    r   r   r   r    decompositionr!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   virtualizedr2   r3   	getLoggerr'  rR  ry   rO   rD   r   r  tr_c10dr(  rE   r   r:   r;   rQ   r  rw  r  bmmconvolutionconvolution_backwardr)  rU  mmr  upsample_bicubic2d_int_mmr  int8int16r\  rY  r  r  r   	complex32	complex64rh   r  r^   r]   r`   rg   ri   rx   r}   r   r   r   r   r   r   r   r   r
  r\   r   convert_element_typer  r  r>  r"  r   r$  
device_putr%  r,  r/  r1  r   aliasdetachdetach_liftview_ofr9  rk   r:  rC  rD  rH  rK  rP  rR  rU  rV  rW  r   ra  rd  rp  _unsafe_viewreshaperq  slicerz  r  rC   r  r  r  r  r  r  r  r  r  r  r  r[  r  rs  r  r1  r  	lru_cacher;  rE  rF  r>  rA  rB  rC  rL  rV  r[  rngprimsrl  rn  rt  	bernoulliro  ru  rx  r{  r}  r^  r  r  r  randintforce_stride_orderr  ri  r  r  r  lookup_seedr  randomr  r  r  r  r  r  r  r  FALLBACK_ALLOW_LIST_adaptive_avg_pool2d_backward
_cudnn_rnn_cudnn_rnn_backwardcumsumcumprod_embedding_bag_embedding_bag_forward_only_flash_attention_forward_flash_attention_backward_fused_moving_avg_obs_fq_helper*_fused_moving_avg_obs_fq_helper_functionalgrid_sampler_2d_backwardrandperm'_scaled_dot_product_efficient_attention0_scaled_dot_product_efficient_attention_backward#_scaled_dot_product_flash_attention,_scaled_dot_product_flash_attention_backwardsortstable(_sparse_coo_tensor_with_dims_and_tensors_thnn_fused_lstm_celltopkupsample_bicubic2d_backwardrI  upsample_linear1dupsample_trilinear3dupsample_linear1d_backwardupsample_trilinear3d_backward_adaptive_avg_pool3dadaptive_max_pool2dadaptive_max_pool3daddbmmaddmv_addmm_activation
avg_pool3d
block_diag_cdist_forwardcummaxcummindigamma_efficientzerotensor*_embedding_bag_per_sample_weights_backwardfractional_max_pool2dfractional_max_pool3dfrexpgeqrfhistci0igammaigammacisinkthvaluelinalg_cholesky_exlinalg_cross_linalg_detlinalg_householder_productlinalg_inv_exlinalg_ldl_factor_exlinalg_ldl_solve	linalg_lulinalg_lu_factor_exlinalg_lu_solvelinalg_matrix_exp	linalg_qr_linalg_slogdet_linalg_solve_exlinalg_solve_triangular_linalg_svdlogcumsumexp	lu_unpackmax_pool3d_with_indicesmax_unpool2dmax_unpool3dmedianr  	nanmedianormqr_pdist_forwardpixel_shufflepixel_unshuffle	polygammaputreflection_pad1dreplication_pad1dresizeresize_	resize_as
resize_as_searchsortedspecial_airy_aispecial_bessel_j0special_bessel_j1special_bessel_y0special_bessel_y1special_chebyshev_polynomial_tspecial_chebyshev_polynomial_uspecial_erfcxspecial_hermite_polynomial_hspecial_hermite_polynomial_hespecial_i0e
special_i1special_i1especial_laguerre_polynomial_lspecial_modified_bessel_i0special_modified_bessel_i1special_modified_bessel_k0special_modified_bessel_k1special_ndtri!special_scaled_modified_bessel_k0!special_scaled_modified_bessel_k1special_spherical_bessel_j0special_zetatake
_trilinearuniformunsafe_splitvdot_adaptive_avg_pool3d_backwardadaptive_max_pool2d_backwardadaptive_max_pool3d_backwardavg_pool3d_backward_cdist_backward_embedding_bag_dense_backwardfractional_max_pool2d_backwardfractional_max_pool3d_backward_linalg_check_errors max_pool3d_with_indices_backward_pdist_backwardreflection_pad1d_backwardreplication_pad1d_backwardsoft_margin_loss_backwardlinalg_pinvatol_rtol_tensorsegment_reducer(  _segment_reduce_backwardanglecholesky_inversecholesky_solve_fft_r2c	histogrambin_ct_histogramdd_bin_edges_histogramdd_from_bin_ctsindex_reducemasked_scatter	to_sparse
_to_sparsetriangular_solvegcd_linalg_eighrn   r  _prims	rng_primsrun_and_save_rng_staterun_with_rng_stateexponentialr  r  rJ  r  r  r  r  rX  scalar_tensorr  
LongTensorr  r  r  rM  r  rl  r  r3  
empty_likery  
zeros_liker  r  r  r	  r  r  r  r!  r,  r=  r   rA  rF  rH  rI  rN  rC  rG  r\  rY  rd  r]  rl  rj  rp  rh  r   r  r  r  r  r  r  r  r  r  r   r  r  r  r	  r  r!  rN  r^  rj  r  rx  r  r  r  r  r  r  rs  r  r  r  r  r  r  r  r  r  r  r  r  rZ  r  r  rt  r  r   r  true_divideTensorr  r  r  ra  r  rg  r   r   r  r  r  r	  r
  rq  r  rr  r  r  r  r  r  rF   r  r  expr  expm1relur  r  r  r  cossinr  bitwise_andbitwise_left_shiftbitwise_not
bitwise_orbitwise_right_shiftbitwise_xorlgammaerfspecial_erflog1ptantanhr  r  r  logical_xorr  r  	clamp_min	clamp_maxnegr  	remaindersignsignbitr  r  r  r  r  necoshsinhacosacoshasinasinhatan2atanatanhcopysignerfcerfinvhypotlog10	nextafter_foreach_addScalar_foreach_mul_foreach_sub_foreach_neg_foreach_powScalarAndTensor_foreach_div_foreach_sqrt_foreach_maximum_foreach_reciprocal_foreach_sign_foreach_copyr  add_bitwise_and_bitwise_left_shift_bitwise_not_bitwise_or_bitwise_right_shift_bitwise_xor_mul_div_Tensor_modelogical_and_logical_not_logical_or_logical_xor_sub_relu_sigmoid___and__
__lshift____or__
__rshift____xor____iand____ilshift____ior____irshift____ixor__r  r"  r#  r$  r   methodfuncr%  _inductor_testr   r&  )torch.distributed._functional_collectivesc10d_functionalwait_tensorr(  
all_reducer0  r3  r5  r7  r:  r=  ImportErrorry  r5  r>  register_quantized_opsr8   r8   r8   r9   <module>   sh   44
		"4
"8M

-














/
:,
	


		  Y
%
&
	
	










&.


















I
8








%-	

	]($e&"

l
T*6"*P 
C0]	 (2
*
*


























