o
    h                    @   sb"  U d dl Z d dlZd dlZd dlmZ d dl mZmZ d dlmZm	Z	 d dl
mZmZmZmZmZmZmZ d dlZd dlmZ d dlmZ d dlm  mZ d dlmZmZmZ d dl m!Z! d dlm"Z"m#Z#m$Z$m%Z% d d	l&m'Z'm(Z(m)Z)m*Z* d d
l+m,Z,m-Z- d dl.m/Z/m0Z0 ej1j2Z2g Z3ee4 e5d< ej6j7j8Z8G dd deZ9	ddedej:de;fddZ<ee<ej:j=ddZ>ee<ej:j=dZ?ee<ej:j@dZAdedeBdefddZCe!e8jDe?dedefdd ZDe!e8jEe?dedefd!d"ZEe!e8jFe?deded#eGd$eGfd%d&ZFe!e8jHe?d'ed(eGd)eGd*eGd+e;d,efd-d.ZHe!e8jIjJgd/d0 ZKe!e8jIjgd1efd2d3ZLe!e8jMe?d4edefd5d6ZMe!e8jNe?d'ed4efd7d8ZNe!e8jOd'ed4ed9eGd:eGfd;d<ZOe!e8jPe?d4edefd=d>ZPe!e8jQe?d'ed4edefd?d@ZQe!e8jRd'ed4ed$eGfdAdBZRe!e8jSe?d'ed4edCeGdDe;fdEdFZSe!e8jTe?ddHed4edIe4fdJdKZTe!e8jUe?d'edLefdMdNZUe!e8jVe?d4edefdOdPZVe!e8jWe?d'ed4edefdQdRZWe!e8jXd4edSedefdTdUZXe!e8jYd'ed4edSedeeef fdVdWZYe!e8jZe8jZj[\e2j]e?		dd4edXedYeGdZeGd[e;d\eej^ defd]d^ZZe!e8j_e8j_j[\e2j]e?		dd4edXedYeGdZeGd[e;d\eej^ defd_d`Z_e!e8j`e?d'ed4edXedYeGdZeGd[e;dDe;defdadbZ`e!e8jae?d'ed4edcedefdddeZadfedgeBfdhdiZbdjejcfdkdlZde!e8jee?e9jfjgfd4edmedgeBdefdndoZee!e8jhe?d'edLedmedgeBfdpdqZhe!e8jie?e9jfjgdrfd4edmedgeBd#eGfdsdtZie!e8jjj[e?d'ed4edmedgeBd#eGf
dudvZje!e8jjjke?d'ed4edmedgeBd#eGdwefdxdyZle!e8jmj[e?d'ed4edmedgeBdzeGf
d{d|Zme!e8jmjne?d'ed4edmedgeBdzeGdwefd}d~Zod'ed4edmedSee dgeBdeBdedefddZpe!e8jqe?d'ed4edeBdefddZqe!e8jrd'ed4edmedSee dgeBdeBdedefddZre!e8jsd'ed4edmedSee dgeBdeBdedefddZse!e8jte?de9jfjgfd4edmedSee dgeBdef
ddZte!e8jue?de9jfjgfd'ed4edmedSee dgeBdefddZue!e8jve* e?e9jfjgfdLedmedgeBdefddZve!e8jwe?e9jfjgfd'ed4edmedgeBdef
ddZwe!e8jxddLededeGfddZxe!e8jydededefddZye!e8jzd'edeeB deBdeBdeBdeBfddZze!e8j{j	 			dd4edeBdeeB deeB deBf
ddZ|e!e8j}d'edeeB deBdeBfddZ}e!e8j~d'edeeB deBdeBdeBf
ddZ~d'edwedejcfddZe!e8je>d'ededeBdejcfddZe!e8je>d'ededeBdejcfddZdd Ze!e8je* e?dLedeeB deeB deeB deeB defddZe!e8je* e?dLedeeB deeB deeB deeB deeB defddZe!e8jd'eded)eGfddZe!e8jdHedeeB deBdeBdeBdefddńZe!e8jj[e?	dd'ed4edeeG defddȄZe!e8je8jj[\e2je8jj[\e2jdLedeGdee; fdd˄Ze!e8jdLedeGdee; fdd̈́Ze!e8je* dedeBde;fddЄZe!e8je* dedeBde;fdd҄Ze!e8jjdd4eded(eGdefddԄZe!e8jjJdd4edeGd(eGdefddքZe!e8j			ddSededeBde;de;defdd݄Ze!e8jd'ededeBdeBde;f
ddZdeeB fddZe!e8je8jg	 d d4edeeB deBdee fddZe!e8jje8jjgd d4edeBdeBdeedf fddZe!e8je* e?dd4ededed#eBd(eBf
ddZe!e8je* e?			dd4ededed#eBd(eBde;fddZe!e8je* e?dd4ededed#eBd(eBf
ddZe!e8je?d'edLedededee deBdeBdeBdeBdee; deee ee ee f fddZdee dee fddZe!e8jd edLedeeB dededSee dee dee; deee ee ee f fddZdLedSee dee dee dee d[e;deGdeGde;deeeeee ee f fd	d
Ze!e8jdLedSee dee dee dee d[e;deGdeGdeeeef fddZe8jj[\e2je8jj[\e2jdLedSee dee dee dee d[e;deGdeGdeeeef fddZe8jj[\e2jd dee fddZe!e8jj[dLedSee dee dededeGdeGdeeeef fddZe!e8jj[dLedSee dee deded[e;deGdeGdeeeef fddZe!e8jjdLedSee dee d[e;deGdeGdeeeef fddZe!e8jj[dLedSee dee deded[e;deGdeGdeeeeeef fddZe!e8je?dddZdd Zdd Ze!e8jddddddddedjeejc d eej d!e;d"e;d#eej fd$d%Ze!e8je8je8jgd&d' Ze8jj[\e2je!e8jdLedSedee dee dee d[e;d(eGd)eGfd*d+Zd,d- Ze!e8jd edLedSee dee dee d.ee d/ee de;deGdee; deeee ee f fd0d1Ze!e8jdLed'edSedee dee d.ee d2ee d)eGd3efd4d5Ze!e8je?dLedeeBeBf fd6d7Ze!e8jdd8de$deBde$d9e$d(e#f
d:d;Ze!e8je* dd8de$deBde$d9e$d(e#f
d<d=Zdd8de$deBde$d9e$d>e;d(e#fd?d@Ze!e8jde$deBde$d9e$fdAdBZe!e8je* de$deBde$d9e$fdCdDZde$deBde$d9e$d>e;f
dEdFZe!e8je*ddce?d4edeeef fdGdHZe!e8j	I	rddedJee;eBeGf dKee;eBeGf fdLdMZe!e8jddNdOZdPdQ ZdRdS Ze!e8jjŃe8jjŠ\e2je8jjŠ\e2jdTdU Ze!e8jjŃe8jjŠ\e2je8jjŠ\e2jdVdW Ze!e8jjŃe8jjŠ\e2je8jjŠ\e2jdXdY ZʐdZd[ Ze!e8jj[e8jj[\e2je?	ddLedeeB d\eeG defd]d^Ze!e8jj[e8jj[\e2je?		ddLedeeB d_eeG d`eeG def
dadbZe!e8jj[e8jj[\e2je?			ddLedeeB dceeG d_eeG d`eeG defdddeZɐdfdg Z̐dhdi Z͐djdk Zΐdldm Z	ddndoZАdpdq Zѐdrds ZҐddtduZӐddvdwZԐdxdy Ze!e8jj׃e8jjנ\e2je8jjנ\e2jdzd{ Ze!e8jj׃e8jjנ\e2je8jjנ\e2jd|d} Ze!e8jjۃe8jj۠\e2je8jj۠\e2jd~d Ze!e8jjۃe8jj۠\e2je8jj۠\e2jdd Zݐdd ZސdddZߐdddZdd Ze!e8jj׃e8jjנ\e2je8jjנ\e2jdd Ze!e8jjۃe8jj۠\e2je8jj۠\e2jdd Zdd Zdd Ze!e8jjۃe8jj۠\e2je8jj۠\e2jdd Ze!e8jj׃e8jjנ\e2je8jjנ\e2jdd Ze!e8jjŃe8jjŠ\e2je8jjŠ\e2jdd Ze!e8jjŃe8jjŠ\e2je8jjŠ\e2jdd Ze!e8jj[e8jj[\e2je?		ddLedeeB de;d_eeG d`eeG defddZe!e8jj[dedede;fddZe!e8je8jgdd Ze!e8jgdd Zd4edmedSee dgeBdeBdeeef fddZe!e8jd4edmedSee dgeBdeBdeeef fddZe!e8jd4edmedSee dgeBdeBdeeef fddZdedeGdefddZdedeGdefddZdede%fddZde%dedefddZdee defddZdeBde;djejcd ejfddZdedeBdeBde;fddZdedeBdeBdeBde;f
ddÄZdedeeB de;fdĐdńZdedeeB de;fdƐdǄZe!e8j e?dedeeB de;fdȐdɄZ e!e8je?	 	 	ddededeBdeBde;defd͐d΄Ze!e8je* e?dϐdЄ Ze!e8je* e?dѐd҄ Ze!e8jdde9jfjgfdӐdԄZdejdejde;fdאd؄Ze8jj[\e2je* dِdڄ Ze!e8jj[e?		ddedeeBeBf de;deeG deeG defdݐdބZe!e8jjŃe8jjŠ\e2je8jjŠ\e2je* e?	ddedeeeBeBf  de;deeeGeGf  def
ddZ	e!e8j
e*dddddddZ
e!e8je* dddddZe!e8jj[e8jjnge* dejdddde#djeejc dejd eej d!e;f
ddZe!e8jjgdejdddde#de#djeejc dejd eej d!e;fddZe!e8je8jj[\e2je* ddde9jfjgfdLedmede#de#dSee dgeBdefddZe!e8je8jj[\e2je*dddLedmedgeBdeeef fddZdd Zee8je8j ee8je8j ee8je8j ee8je8j ee8je8jI ee8je8j ee8je8jP ee8je8j  ee8j!e8jM ee8j"e8j# ee8j$e8j% ee8j&e8j' ee8j(e8j) ee8j*e8j+ ee8j,e8j- ee8j.e8j/ ee8j0e8j1 ee8j2e8j3 ee8j4e8j5 ee8j6e8j7 ee8j8e8j9 ee8j:e8j; ee8j<e8j= ee8j>e8j? ee8j@e8jV dS (	      N)Enum)partialreduce)chainproduct)CallablecastIterableListOptionalTupleUnion)	sym_floatsym_intTensorregister_decomposition)IntLike
NumberType
TensorLikeTensorSequenceType)_maybe_convert_to_dtype_maybe_resize_out_safe_copy_outout_wrapper)expect_true	guard_int)tree_flattentree_map__all__c                   @   s   e Zd ZdZdZdZdS )	Reductionr         N)__name__
__module____qualname__NONEMEANSUM r)   r)   R/var/www/html/ai/venv/lib/python3.10/site-packages/torch/_decomp/decompositions.pyr    "   s    r    Fftype_promotioncompute_dtype_onlyc                    s   t  fdd}|S )Nc                     sr   dd t | |fd D }tj|di\  fdd}fdd}t|| i t||}r4|S t||S )	Nc                 S   s   g | ]	}t |tr|qS r)   )
isinstancer   .0xr)   r)   r*   
<listcomp>2   s
    
z-type_casts.<locals>.inner.<locals>.<listcomp>r   type_promotion_kindc                       t | tr
|  S | S Nr.   r   tor1   computation_dtyper)   r*   increase_prec:      

z0type_casts.<locals>.inner.<locals>.increase_precc                    r4   r5   r6   r8   )result_dtyper)   r*   decrease_prec@   r<   z0type_casts.<locals>.inner.<locals>.decrease_prec)r   utilselementwise_dtypesr   )argskwargs	flat_argsr;   r>   rr-   r+   r,   )r:   r=   r*   inner0   s   

ztype_casts.<locals>.inner)	functoolswraps)r+   r,   r-   rF   r)   rE   r*   
type_casts+   s   rI   T)r,   r-   )r,   r1   dimreturnc                 C   s$   t ||   D ]}| d} q| S )N)rangerJ   	unsqueeze)r1   rJ   _r)   r)   r*   _unsqueeze_to_dim]   s   rP   out_gradyc                 C   s   | d||     S Nr!   conj_physicalrQ   rR   r)   r)   r*   tanh_backwardc      rW   c                 C   s   | |d|     S rS   rT   rV   r)   r)   r*   sigmoid_backwardi   rX   rY   beta	thresholdc                 C   s.   ||   }t|| |k| | | |d  S N      ?)exptorchwhere)rQ   r1   rZ   r[   zr)   r)   r*   softplus_backwardo   s   "rb   grad_outputalphascaleinput_scale	is_resultself_or_resultc           	      C   sb   || }|}|}|rt |dk| | ||  | | S t |dk| | | t ||  | | S Nr   )r_   r`   r^   )	rc   rd   re   rf   rg   rh   negcoefposcoef
negiptcoefr)   r)   r*   elu_backwardv   s   
rm   c                 C      t | |S r5   )r_   	full_likeselfvaluer)   r)   r*   fill_scalar      rs   rr   c                    s(   t   dk fdd t|  S )Nr   c                      s   d    dS )Nz@fill only supports 0-dimension value tensor but got tensor with z dimensionsrJ   r)   rr   r)   r*   <lambda>       zfill_tensor.<locals>.<lambda>)r_   _checkrJ   atencopyrp   r)   rv   r*   fill_tensor   s
   

r|   rq   c                 C   s    t jt j| d ddddd S N   r   min   maxr_   clamprq   r)   r)   r*   hardsigmoid   s    r   c                 C   s   t |dk|dk @ | d dS )Ng      g      @gUUUUUU?        r_   r`   rc   rq   r)   r)   r*   hardsigmoid_backward   s
   r   min_valmax_valc                 C   s   t ||k||kB d| S Nr   r   )rc   rq   r   r   r)   r)   r*   hardtanh_backward   s   r   c                 C   s$   | t jt j| d dddd d S r}   r   r   r)   r)   r*   	hardswish   s   $r   c              
   C   s,   t |dk dt |dk| |d d  | S )Nr   r~         ?r   r   r)   r)   r*   hardswish_backward   s
   r   c                 C   s   t ||kd| S r   r   )rc   rq   r[   r)   r)   r*   threshold_backward      r   negative_slopeself_is_resultc                 C   s   t |dk| | | S ri   r   )rc   rq   r   r   r)   r)   r*   leaky_relu_backward   s   r   nonegradapproximatec                 C   s   d}d}d}|dkrO|| d }d}|| }|| }	||||	   }
t |
}d| }d| }d| }d||  }|dd| |   }|| | }| ||  S |}|| d }ddt ||   }|t || d	  }| |||   S )
Ng;f?g;f?gmBP?tanhr   gHm?r!   r~   g      )r_   r   erfr^   )r   rq   r   M_SQRT2	M_SQRT1_2
M_2_SQRTPIkBetakKappax_sqx_cuberF   
tanh_innerleftrightleft_derivativetanh_derivativeinner_derivativeright_derivativekAlphacdfpdfr)   r)   r*   gelu_backward   s,   
r   inputc                 C   s:   t t|}t |}|| d||   }| ||  S rS   )r_   r   Fsoftplussigmoid)rc   r   input_tanh_softplusinput_sigmoidoutr)   r)   r*   mish_backward   s   
r   c                 C   s   | t |  S r5   )r_   r   r   r)   r)   r*   silu   s   r   c                 C   s,   ddt |   }| | d|d|    S rS   )r_   r^   )rc   rq   r   r)   r)   r*   silu_backward  s   r   weightc                 C   s   t | dk| ||  S ri   r   )rq   r   r)   r)   r*   _prelu_kernel	  s   r   c                 C   s4   t |dk| ||  }t |dkd||  }||fS )Nr   r   r   )rc   rq   r   
input_gradweight_gradr)   r)   r*   _prelu_kernel_backward  s   r   noiseloweruppertraining	generatorc           
      C   sh   |d u sJ |r(| dk}t | ||}t|| | | }|t||d |S || d }	t | |	S Nr   r!   r"   )rz   uniformr_   r`   copy_
leaky_relu)
rq   r   r   r   r   r   not_positiverD   outputr   r)   r)   r*   rrelu_with_noise  s   r   c              	   C   s   |  t| |||||S r5   )r   r   )rq   r   r   r   r   r   r)   r)   r*   rrelu_with_noise_0  s   r   c                 C   s6   |r|| dkr|  |S || d }t| |||S )Ngư>r"   )mulrz   r   )rc   rq   r   r   r   r   r   r   r)   r)   r*   rrelu_with_noise_backward>  s   
r   bufferc                 C   sN   |dk }t |dd}t |dd}t t | }| |||d|     S )Nr   r!   rL   )r_   r`   r^   abs)rc   rq   r   in_negative	max_derivsignra   r)   r)   r*   log_sigmoid_backwardR  s
   r   loss	reductionc                 C   s0   |t jjkrt| S |t jjkrt| S | S r5   )r    r'   rr   r_   meanr(   sum)r   r   r)   r)   r*   apply_loss_reduction^  s
   

r   dtypec                 C   s4   | t jkrt jS | t jkrt jS | t jkrt jS d S r5   )r_   	complex32float16	complex64float32
complex128float64r   r)   r)   r*   to_real_dtypeg  s   


r   targetc                 C   s   | | d }t ||S )Nr"   )r   )rq   r   r   r   r)   r)   r*   mse_lossv  s   
r   c                 C   s,   |t jjkrd|  nd}|||  |  S )N       @)r    r'   rr   numel)rc   r   r   r   normr)   r)   r*   mse_loss_backward  s   r   r]   c                 C   s<   | |   }t||k d|d  | |d|  }t||S )Nr   r"   )r   r_   r`   r   )rq   r   r   rZ   r   r)   r)   r*   smooth_l1_loss  s   &
r   c           	      C   sZ   |t jjkrd|  nd}|| }t|}||  }t||k || | |t| S r\   )r    r'   rr   r   r_   r   r`   r   )	rc   rq   r   r   rZ   r   r1   abs_x	norm_gradr)   r)   r*   smooth_l1_loss_backward  s   

r   
grad_inputc                 C   *   t | ||||}t||j t||ddS NT)	copy_fromcopy_toexact_dtype)r   r   shaper   )rc   rq   r   r   rZ   r   resultr)   r)   r*   smooth_l1_loss_backward_out     
r   deltac              
   C   s`   |t jjkrd|  nd}|| }t|| k | |  | t||k||  | || |  S r\   )r    r'   rr   r   r_   r`   )rc   rq   r   r   r   r   r1   r)   r)   r*   huber_loss_backward  s    r   c                 C   r   r   )r   r   r   r   )rc   rq   r   r   r   r   r   r)   r)   r*   huber_loss_backward_out  r   r   ignore_indextotal_weightc                 C   s   |  dk rdnd}|tjjkr| | } ||}t||k|d}t|}	t|	||d}	|	  |     kr=dkrDn n| |} |d urcdd t	|  D }
|j
d |
|< ||
}| | } t||k| d} |	|  S )Nr"   r   r!   g      c                 S   s   g | ]}d qS r!   r)   r0   rO   r)   r)   r*   r2     rx   z&_nll_loss_backward.<locals>.<listcomp>)rJ   r    r'   rr   rN   r_   r`   
zeros_likescatterrM   r   reshape)rc   rq   r   r   r   r   r   channel_dimsafe_targetr   	new_shaper)   r)   r*   _nll_loss_backward  s    	

 

r  c           
      C   s   |  dks
J dt|  |}||}|d dks'J d| d| |d }||d|}||||}t|}d| | | |  }	||  }tj||	g|dS )Nr   z*glu does not support 0-dimensional tensorsr"   z.Halving dimension must be even, but dimension z	 is size r]   ru   )rJ   r?   canonicalize_dimsizenarrowr_   r   cat)
rc   rq   rJ   wrap_dimnIn	inputSize	firstHalf
secondHalfgradInputFirstHalfgradInputSecondHalfr)   r)   r*   glu_backward  s   

r  c                 C   sr  d|    krdksJ d J d|  dksJ d|  dko)|  dk}|sC|jd |jd ksCJ d|j d|j d| dksXJ d	|j d
|  df|d u si| |jd ksiJ d|tjjkr|  dkr|   dkr| jd |jd ksJ d|jd  d|    d| jd  n|   dkr|  dksJ d| j t| ||||||S )Nr   r"   input tensor should be 1D or 2Dr!   ;0D or 1D target tensor expected, multi-target not supportedsize mismatch (got input: 
, target: ):expected total_weight to be a single element tensor, got: z (z
 elements)rL   z<weight tensor should be defined either for all or no classesz7Expected a tensor of dimension 1 and tensor.size[0] == z but got: dimension z and tensor.size[0] == z7Expected a single element grad_output tensor, but got: )rJ   r   r   r    r&   rr   r  )rc   rq   r   r   r   r   r   no_batch_dimr)   r)   r*   nll_loss_backward  s<   (
"
r  c                 C   s   |  dksJ d|   |  dksJ d|   |jd |jd kr<|jd |jd kr<|jd |jd ksHJ d|j d	|j | dks\J d
|j d|  dt| ||||||S )N   zSonly batches of spatial inputs supported (4D tensors), but got input of dimension: r~   zUonly batches of spatial targets supported (3D tensors) but got targets of dimension: r   r"   r!   r  r  r  z ( z, elements))rJ   r   r   r  )rc   rq   r   r   r   r   r   r)   r)   r*   nll_loss2d_backward1  s*   r  c              	   C   s\   |d t t |  | dd |t t | | dd  }|d ur)|| }t||S )Nr!   r)   i)r_   maximumlog1pnew_fulllogr   )rq   r   r   r   r   r)   r)   r*   binary_cross_entropyS  s   

r  c                 C   sR   d}| ||  t j|d|  |d }|d ur|| }|tjjkr'||  }|S )Ng-q=r!   r   )r_   r   r    r'   rr   r   )rc   rq   r   r   r   EPSILONr   r)   r)   r*   binary_cross_entropy_backwardh  s   	"r   c                 C   s    t t |  | }t||S r5   )r_   r  r^   r   )r   r   r   r   r)   r)   r*   soft_margin_lossz  s   
r!  c                 C   s6   ||  t || d  }|tjjkr||  }|S rS   )r_   r   r    r'   rr   r   )rc   rq   r   r   r   r)   r)   r*   soft_margin_loss_backward  s   r"  r"   otherpc                 C   s   t j| | |dS )N)r$  )rz   r   )r   r#  r$  r)   r)   r*   dist  r   r%  x1x2c           	      C   s   |  ddd}tj|tjd}| ddd}tj|tjd}t| d||gd}t|||gd}||j}|	d
 S )Nr"   rL   Tmemory_formatr   )powr   r_   	ones_likecontiguous_formatr  r   matmulmT	clamp_minsqrt)	r&  r'  x1_normx1_padx2_normx2_padx1_x2_r   r)   r)   r*   _euclidean_dist  s   r8  input_sizesstartendstepc                 C   s   |  |}t|| ||||S r5   )	new_zerosr_   slice_scatter)rc   r9  rJ   r:  r;  r<  r   r)   r)   r*   slice_backward  s   
	r?  r!   c                 C   s:  |   }|dkrtdt|   |}t|  }t|  }|dkr(td|d ur.|nd}|d ur6|ntj}	|dk rC||| 7 }|	dk rM|	|| 7 }	|dk rTd}n
||| kr^|| }|	|k re|}	n
|	|| kro|| }	| 	 |||   }
|	| }|| d | ||< ||  |9  < | j
rtd| |||
S )Nr   z,slice() cannot be applied to a 0-dim tensor.zslice step must be positiver!   z<Slice decomposition for quantized tensors aren't implemented)rJ   RuntimeErrorr?   r  listr  stridesysmaxsizestorage_offsetis_quantizedNotImplementedError
as_strided)rq   rJ   r:  r;  r<  ndimsizesstrides	start_valend_valrE  lenr)   r)   r*   slice_forward  s>   	rO  indexc                 C   s   |  |}t|| ||S r5   )r=  r_   select_scatter)rc   r9  rJ   rP  r   r)   r)   r*   select_backward  s   
rR  offsetdim1dim2c                 C   s   |  |}t|| |||S r5   )r=  r_   diagonal_scatter)rc   r9  rS  rT  rU  r   r)   r)   r*   diagonal_backward  s   
rW  input_dtypec                 C   s   | j |kr
||}|S r5   )r   r7   )rc   r   rX  r)   r)   r*   _cast_grad_to_input_dtype  s   

rY  r   c                 C   s0   | | }||t j||dd  }t| || S NTrJ   keepdim)r_   r   rY  
contiguous)rc   r   rJ   rX  new_grad_outputr   r)   r)   r*   _softmax_backward_data  s
   
r_  c                 C   s*   | t |t j| |dd  }t| ||S rZ  )r_   r^   r   rY  )rc   r   rJ   rX  r   r)   r)   r*   _log_softmax_backward_data  s   
r`  c           
      C   sZ   | |d  ||d   }t tjtj|d}|d||d}|d|| |d}	||	 S )z/Utility function to implement im2col and col2imr"   r!   r   devicer   rL   )r   r_   arangeint64rN   )
input_dkernel_d
dilation_d	padding_dstride_drb  blocks_d	arange_kwblocks_d_indiceskernel_gridr)   r)   r*    _im2col_col2im_indices_along_dim  s
   rn  kernel_sizedilationpaddingrB  c              	      s&  t tdkdd  t t dkdd  t tdkdd  t tdkdd  ddd	}|d
 | d | ddd |d | jt}t |dv odtdd dd  D fdd tdd tdd   D t tdd D  fdd |dk}|s| d} | j\}}	}
}\}}\}} \}}\}}t|
||||| j	}t|||||| j	}t
| ||||f}|dd}|d d d d ||f }|dddddd}|d}|d}|||	| | || }|s|d}|S ) Nr"   c                   S      dS )Nz"im2col(): only 2D kernel supportedr)   r)   r)   r)   r*   rw   4      zim2col.<locals>.<lambda>c                   S   rr  )Nz$im2col(): only 2D dilation supportedr)   r)   r)   r)   r*   rw   5  rs  c                   S   rr  )Nz#im2col(): only 2D padding supportedr)   r)   r)   r)   r*   rw   6  rs  c                   S   rr  )Nz"im2col(): only 2D stride supportedr)   r)   r)   r)   r*   rw   7  rs  Tc                 S   <   |rt dd | D nt dd | D }t|dd  d S )Nc                 s       | ]}|d kV  qdS r   Nr)   r0   r$  r)   r)   r*   	<genexpr>:      z1im2col.<locals>.check_positive.<locals>.<genexpr>c                 s       | ]}|d kV  qdS rv  r)   rw  r)   r)   r*   rx  :  ry  c                   S   rr  )Nz<{param_name} should be greater {'than' zero, but got {param}r)   r)   r)   r)   r*   rw   <  rs  z0im2col.<locals>.check_positive.<locals>.<lambda>allr_   ry   param
param_namestrictcondr)   r)   r*   check_positive9     (zim2col.<locals>.check_positivero  rp  rq  Fr  rB  r~   r  c                 s       | ]}|d kV  qdS rv  r)   r0   dr)   r)   r*   rx  G  ry  zim2col.<locals>.<genexpr>r   c                         dt   S )NzmExpected 3D or 4D (batch mode) tensor for input with possible 0 batch size and non-zero dimensions, but got: tupler)   r   r)   r*   rw   H      c                 s   s>    | ]\}}}}}d |d|  ||d    d  |  V  qdS )r!   r"   Nr)   r0   r   paddilkerstr)   r)   r*   rx  K  s
    "
r*  c                 s   ru  rv  r)   )r0   cr)   r)   r*   rx  R  ry  c                      s6   dt dd   d d  d d d dS )	Nz!Given an input with spacial size r*  , kernel_size=, dilation=
, padding=	, stride=z9, the calculated shape of the array of sliding blocks is z*, but its components must be at least one.r  r)   rp  ro  output_sizerq  r   rB  r)   r*   rw   S  s    r  r   rL   r!   r~      T)r_   ry   rN  r   r|  r  ziprN   rn  rb  r   r  permuter  r   squeeze)r   ro  rp  rq  rB  r  rI  batched_input	batch_dimr   input_hinput_wstride_hstride_w	padding_h	padding_w
dilation_h
dilation_wkernel_hkernel_wblocks_row_indicesblocks_col_indicespadded_inputr   num_blocks_rownum_blocks_colr)   r  r*   im2col*  sd   




 




r  r  c              
      s  t tdkdd  t tdkdd  t tdkdd  t tdkdd  t tdkdd  d$d	d
}|d |d |ddd |d |d | jt}t |dv outdd dd  D fdd d d  }t d | dkfdd dd tD }	|	d |	d   t d  k fdd t  dk fdd |dk}
|
s| d} | j\}}\}}\}}\}}\}}| d d | g |	 } | dddd dd!} t	|||||| j
}t|d }t	|||||| j
}d"d tD }| d d t g| }d d ||f}tj||| dd#}t|| | | | f}|
sf|d}|S )%Nr"   c                   S   rr  )Nzonly 2D output_size supportedr)   r)   r)   r)   r*   rw     rs  zcol2im.<locals>.<lambda>c                   S   rr  )Nzonly 2D kernel supportedr)   r)   r)   r)   r*   rw     rs  c                   S   rr  )Nzonly 2D dilation supportedr)   r)   r)   r)   r*   rw     rs  c                   S   rr  )Nzonly 2D padding supportedr)   r)   r)   r)   r*   rw     rs  c                   S   rr  )Nzonly 2D stride supportedr)   r)   r)   r)   r*   rw     rs  Tc                 S   rt  )Nc                 s   ru  rv  r)   rw  r)   r)   r*   rx    ry  z1col2im.<locals>.check_positive.<locals>.<genexpr>c                 s   rz  rv  r)   rw  r)   r)   r*   rx    ry  c                   S   rr  )Nz9{param_name} should be greater than zero, but got {param}r)   r)   r)   r)   r*   rw     rs  z0col2im.<locals>.check_positive.<locals>.<lambda>r{  r}  r)   r)   r*   r    r  zcol2im.<locals>.check_positivero  rp  rq  Fr  rB  r  )r"   r~   c                 s   r  rv  r)   r  r)   r)   r*   rx    ry  zcol2im.<locals>.<genexpr>r*  c                      r  )NzmExpected 2D or 3D (batch mode) tensor for input with possible 0 batch size and non-zero dimensions, but got: r  r)   r  r)   r*   rw     r  r   r!   c                      s   dd  d  S )Nz|Expected size of input's first non-batch dimension to be divisible by the product of kernel_size, but got input.shape[-2] = r*  z and kernel_size=r)   r)   )ro  r   r)   r*   rw     s
    c                 S   s:   g | ]\}}}}}d |d|  ||d    d  |  qS r!   r"   r)   r  r)   r)   r*   r2     s    "zcol2im.<locals>.<listcomp>rL   c                      4   d d d d d d  dd  d	S 
NzGiven output_size=r  r  r  r  z , expected input.size(-1) to be 	 but got rL   .r)   r)   Lrp  ro  r  rq  r   rB  r)   r*   rw         c                      r  r  r)   r)   r  r)   r*   rw     r  r~   r  r  c                 S   s   g | ]
\}}|d |  qS r"   r)   )r0   or$  r)   r)   r*   r2     s    
accumulater  )r_   ry   rN  r   r|  r  rN   r   r  rn  rb  rP   r=  prodrz   _unsafe_index_putr   r  r  )r   r  ro  rp  rq  rB  r  rI  prod_kernel_sizecolr  out_hout_wr  r  r  r  r  r  r  r  indices_rowindices_coloutput_padded_sizer   idxr)   r  r*   col2im}  s   




 



"

r  maskc                 C   s$   | | | |  jt| d}|S )Nr(  )type_ascloner?   suggest_memory_format)rc   r  re   rD   r)   r)   r*   native_dropout_backward  s   r  
input_size	dimensionr  c           	      C   s   t |dkrt| dS tt ||}tj|| | jtjd}|d||	 }| 
d|d 	||d } | |}d| |f }tj||| dd S )Nr   rb  r   rL   r!   r5   Tr  )rN  r_   squeeze_copyr?   r  rc  rb  int32unfoldflattenmovedimr=  rz   r  r]  )	r   r  r  r  r<  rJ   r  r   rP  r)   r)   r*   unfold_backward  s   
r  epsc              	   C   st   |d ur|}d| }t t ||k||k| |d|   dS t t |dk|dk| |d|   |dtdS )Nr]   r   r)   nan)r_   r`   logical_andr  float)rc   rq   r  lohir)   r)   r*   logit_backward   s   r  trainc                 C   s&   |r|dkrt | ||d S |  S ri   )rz   native_dropoutr  )r   r$  r  r)   r)   r*   dropout  s   r  c                 C   sp   |r.|dkr.|dkrt | t j| t jdfS t | |k}||  tdd|   }||fS | t j| t jdfS )Nr   r!   r   r]   )r_   r   bool	rand_liker  r,  )r   r$  r  	bool_maskresr)   r)   r*   r    s   r  half_to_floatc                 C   s   |   } |r| jtjksJ tj| tjjd\}}| |} | 	 dkr*t
| }ntj| |dd}t
| | }|tj||dd }|sJ||}|S Nr3   r   T)r\  )r]  r   r_   halfr?   r@   ELEMENTWISE_TYPE_PROMOTION_KINDDEFAULTr7   r   r^   amaxr   )r1   rJ   r  r:   r=   unnormalizedx_maxr   r)   r)   r*   _softmax+  s   


r  c           	      C   s   |   } |r| jtjksJ tj| tjjd\}}| |} | 	 dkr'| }ntj
| |dd}| | }ttjt||dd}|| }|sL||}|S r  )r]  r   r_   r  r?   r@   r  r  r7   r   r  r  r   r^   )	r1   rJ   r  r:   r=   shiftedr  shifted_logsumexpr   r)   r)   r*   _log_softmaxB  s    


r  c                 C      t j|| |dS Nrd   r_   subrq   r#  rd   r)   r)   r*   rsub_TensorZ     r  c                 C   r  r  r  r  r)   r)   r*   rsub_Scalar_  r  r  rL   indicespadding_idxscale_grad_by_freqsparsec                 C   sJ   |   dks
J d|jdkr!| d|}|jdkr|d}|S | | S )Nr"   z'weight' must be 2-Dr!   r   )rJ   rI  index_selectr  )r   r  r  r  r  r   r)   r)   r*   	embeddingd  s   


r  num_weightsc                 C   s   t j| t jjd\}}| |} t|tj}|r8||f}t	|}t
j||g|dd}|| }	| |	d } t||k| j}
| |
d}| |f| j|jd   }t
j||g|dd|S )Nr  Tr  rL   r   )r?   r@   r  r  r7   r   r_   longr=  r,  rz   r  rN   rP   rI  masked_fillr   )rc   r  r   r  r  r:   r=   countsonesgrad_weights_scaler  r   grad_weightr)   r)   r*   embedding_dense_backwardx  s&   


r  c                 C   s   d}| D ]}||9 }q|S rS   r)   )r1   rD   ir)   r)   r*   r    s   
r  split_sizesc                 C   s   t tt|| j| kdd  t|}g }d}t|D ])}|| }t |dkdd  t|| | j| k |	| 
||| ||7 }q|S )Nc                   S   rr  )NzDSplit sizes don't add up to the tensor's size in the given dimensionr)   r)   r)   r)   r*   rw     rs  z"split_with_sizes.<locals>.<lambda>r   c                   S   rr  )NzCsplit_with_sizes expects split_sizes have only non-negative entriesr)   r)   r)   r)   r*   rw     rs  )r_   _check_with
ValueErrorr   r   rN  rM   ry   r   appendr  )rq   r	  rJ   
num_splitssplits	start_idxr  lengthr)   r)   r*   split_with_sizes  s$   
r  
split_size.c                    sx   | j }|| } dkr|dksJ | fS |  d   }t|} fddt|D }  | |  |d< t| ||S )Nr   r!   c                       g | ]} qS r)   r)   r0   r  r  r)   r*   r2     rx   zsplit.<locals>.<listcomp>rL   )r   r   rM   r_   split)rq   r  rJ   r9  dim_sizechunksr	  r)   r  r*   r    s   r  mat1mat2c                 C   H   |   s|  st|}t|}|t|| }|dkr|S |||   S ri   )is_floating_point
is_complexintr_   mm)rq   r  r  rZ   rd   r   r)   r)   r*   addmm  s   r   use_geluc                 C   s<   t | ||||}|r| jrtj|ddS t|S t|S )Nr   )r   )r   is_cudarz   gelurelu)rq   r  r  rZ   rd   r!  r   r)   r)   r*   _addmm_activation  s   

r%  vecc                 C   r  ri   )r  r  r  r_   mv)rq   r  r&  rZ   rd   r   r)   r)   r*   addmv  s   r(  r   rstdgammaNCHxWgroupoutput_maskc
              	      s  t j| ||dd t j|| dd t j|dd t|    k fdd tjfkfdd td u pJ  k fdd t \}
}t|dk fdd t| |	 j
d	gd
}| 	 j
d	gd
}d }d }d }|	d r:d|
  }d urt|d|

d	}t|d|

d	}t|dd|
}n&||

d	}||

d	}t|dtjd|
f|jd}| | | | | | }|  || |  }|d}t|d}t|d}t| |
|t||
| | }||j|j}|	d r_|	|
|	|
d  |d j
dgd
 }|	d	 rk|j
dgd
}|||fS )NF)allow_cpu_scalar_tensorsc                      s   d    dS )NzExpect input to have z	 elementsr)   r)   )r,  r-  r+  r)   r*   rw         z,native_group_norm_backward.<locals>.<lambda>c                      s   d  d dj  S )NzExpect mean to have shape (, z
, but got r  r)   )r+  r.  r   r)   r*   rw         c                      s$   d  dd ur   S d S )NzExpect gamma to have z elements but got rL   r   r)   )r,  r*  r)   r*   rw     s   $ r   c                      s   d  d S )NzExpect number of channels z, to be evenly-divisible by number of groups r)   r)   )r,  r.  r)   r*   rw     rx   r"   ru   r]   rL   r!   rb  r  )r?   check_same_devicecheck_same_shaper_   ry   r   r   divmodr   viewr   rN   r   r  rb  rP   r7   r   )rc   r   r   r)  r*  r+  r,  r-  r.  r/  cpg_remdsdbd_inputd_gammad_biassds_valdb_valc1c2c3r)   )r,  r-  r+  r*  r.  r   r*   native_group_norm_backward  s   
 
""



$

rG  c                 C   s   | d ur	|  |S | S r5   r7   )r1   r   r)   r)   r*   _maybe_castS  s   
rI  grad_outnormalized_shapebiasc           !         s0  |j }| }	t|j  fdd| |||fD \}
}}}|
d us$J |	t| }||d  }|d | }g }g }t|	D ]}||krJ|| q>|| q>t|}t|}|dks`|dkr|d ri|	|nd |d rw|	||d  nd |d r|	||d  fS d fS || | }|d ur|
| }n|
}|| }t
||d}t
||}t
||d}t
||}|| | }d }d }d } |d r|| | }|d r|d urt|dkrt
|
| |d}n|
| }|d r|d urt|dkrt
|
|d} n|
 } t||jt||jt| |jfS )Nc                 3   s*    | ]}|d ur|   n|V  qd S r5   )r7   r]  r/   r9   r)   r*   rx  h  
    
z-native_layer_norm_backward.<locals>.<genexpr>r   r!   r"   TF)r   rJ   r?   get_computation_dtyper   rN  rM   r  r  r=  r_   r   r   r  rI  )!rJ  r   rK  r   r)  r   rL  r/  input_shape
input_ndimgrad_out_cast
input_castweight_cast	bias_castaxis
inner_dims
outer_dimsinner_dim_indicesouter_dim_indicesr  r+  Mx_hat
grad_x_hatabrD  rE  rF  rF   r>  d_weightr@  r)   r9   r*   native_layer_norm_backwardZ  sh   





r`  running_meanrunning_varmomentum
functionalc	                 C   sT  dgt td|   }	t| j}
|}|}|rt| j}
| j|
d}tj||	ddd\}}t	|| }| | | }t
||	}t
||	}|d ur]|| d| |  }|s]|| |d ur|  | jd  }t
||	}|||d   }|| d| |  }|s|| nT|d ur|d usJ |j|
dd}|}|j|
dd}|}|}dt||  }| jjdkr|}|}n
| d	}| d	}t||  d }t||  d }| | | }|d ur| }t||  d }|| }|d ur	| }t||  d }|| }| jjdkr|j| jd}|j| jd}|j| jd||||fS )
Nr   r"   r   T)rJ   
correctionr\  r!   )r   r{   cpur   )rA  rM   rJ   r?   rN  r   r7   r_   var_meanrsqrtr  r   r   r   r1  rb  typer=  rP   r  )r   r   rL  ra  rb  r   rc  r  rd  reduction_dimsr:   new_running_meannew_running_var	input_acc
biased_varr   r)  r   	save_mean	save_rstdnsqueezed_varunbiased_varinvstdr)   r)   r*   native_batch_norm_helper  st   





rv  c              
   C   ,   t | |||||||d	\}}	}
}}||	|
fS NFrv  r   r   rL  ra  rb  r   rc  r  r   rp  rq  rO   r)   r)   r*   native_batch_norm     
r{  c              
   C   sv   |d u r|d u rt | |||||S |d u rtd|d u r"td|r0t | |||||||S t | ||||||S )Nz`running_mean is None, but running_var is provided. They should both be None or both be provided.z`running_var is None, but running_mean is provided. They should both be None or both be provided.)rz   _native_batch_norm_legitr@  $_native_batch_norm_legit_no_training)r   r   rL  ra  rb  r   rc  r  r)   r)   r*   native_batch_norm_decomposition  s&   r  c                    s|   |  |}|| d |   dkr4|dkr4 fdd|D }  | |  ||d < tjjj| ||S tjjj|  |S )Nr!   r   c                    r  r)   r)   r   r  r)   r*   r2   ?  rx   z(unsafe_chunk_py_impl.<locals>.<listcomp>)r  r_   opsrz   unsafe_split_with_sizesdefaultunsafe_splitr   )tensorr  rJ   r  r	  r)   r  r*   unsafe_chunk_py_impl9  s   
r  c              
   C   s   t j| ||||d||S rx  )rz   r}  r  )r   r   rL  ra  rb  rc  r  r)   r)   r*   r~  E  s   
r~  c              
   C   rw  rx  ry  rz  r)   r)   r*   r}  [  r|  r}  c           
   
   C   s,   t | ||d d |||d	\}}}}	}	|||fS rx  ry  )
r   r   rL  r   rc  r  r   rp  rq  rO   r)   r)   r*   !_native_batch_norm_legit_no_statsl  s   	
r  c              
   C   sP   t | |||||||d	\}}	}
}}|d usJ d|d us!J d||	|
||fS )NTz#new_running_mean should not be Nonez"new_running_var should not be Nonery  )r   r   rL  ra  rb  r   rc  r  r   rp  rq  rl  rm  r)   r)   r*   #_native_batch_norm_legit_functional{  s   r  c                 C   sB   |d u sJ t | |k jt jd}|| |  d|  }||fS )Nr   r]   )r_   r  r7   uint8r  )r   r$  r   r  r  r)   r)   r*   _fused_dropout_decomposition  s   r  c                 C   s   t | tjjr
| jS d S r5   )r.   r_   _subclasses
FakeTensorfake_device)r  r)   r)   r*   device_hint  s   r  c                 C   sD   |d ur | j jdkr ddlm} | }d|_|j}||| |S | S )Nmetar   )FakeTensorModeT)rb  rj  torch._subclasses.fake_tensorr  in_kernel_invocationfake_tensor_converterfrom_meta_and_device)r1   common_devicer  	fake_mode	converterr)   r)   r*   wrap_output_with_input_device_  s   r  )r   layoutrb  
pin_memorynon_blockingr)  rb  r  r  r)  c          	      C   s   |r|t jksJ d|rJ d|d u r!|d u r!|d u r!|  S d}t| }|d urI|| jkrI|d urB|jdkrBt j| |} d}t j| |} |d urX|sXt j| |} d}|r_t	| |} |d urjt j| |dS | S )NTODOFrf  Tr(  )
r_   stridedr  r  rb  rj  _primsconvert_element_type
device_putr  )	r1   r   r  rb  r  r  r)  dtype_convertedr  r)   r)   r*   _to_copy  s&   
r  c                 C   s
   t | S r5   )rz   aliasr8   r)   r)   r*   nop_decomposition  s   
r  exponential_average_factorepsilonc              
   C   s^   t | |||||||\}}	}
|r||	|
| jdtjdfS ||d|d| jdtjdfS )Nrg  r   )rz   r{  r=  r_   r  )r   r   rL  ra  rb  r   r  r  r]  r^  r  r)   r)   r*   cudnn_batch_norm  s"   
r  c                 C   sD   t |D ]\}}|dkr|| jk r| j| || ks| |} q| S rS   )	enumeraterI  r   rN   )r1   broadcast_maskrU  r  r)   r)   r*   _broadcast_batch_norm_backward  s
   $
r  rp  save_invstdc
           &         s  |j }
|d ur|j }n|
}t|j   fdd| ||||||fD \}}}}}}}|j}| }|dks9J dd}tt|||  }|}|}|rV|d urS|d usUJ n|d ur^|d us`J |}t|| }dg| }|| ||< g }t	|D ]}||kr|
| qzt||}d| }t||}t|||  |}t|| |}tt|| || |} |d u rt||d }!nt|| |}!|r|| |  }"||" | |! }#n||! }#|	d r|| }$nd }$|	d r|}%nd }%|#|
t|$|t|%|fS )Nc                 3   s&    | ]}|d ur|  n|V  qd S r5   rH  r/   r9   r)   r*   rx  !  s
    
z-native_batch_norm_backward.<locals>.<genexpr>r"   z$rank of the input must be at least 2r!   r]   )r   r?   rN  r   rJ   r  rA  r_   ri  rM   r  r  r   r   r7   rI  )&rJ  r   r   ra  rb  rp  r  r  r  r/  rX  weight_dtyperQ  rR  rS  running_mean_castrunning_var_castsave_mean_castsave_invstd_castrO  
input_rankrU  num_featuresr   ru  r  reduction_axesr  r   grad_output_sumdot_p	grad_mean
proj_scale
grad_scaleprojr   r  	grad_biasr)   r9   r*   native_batch_norm_backward  s   
	



r  save_varreserveSpacec	           	      C   s    t || |||||d|g d
S )NT)TTT)rz   r  )	r   rc   r   ra  rb  rp  r  r  r  r)   r)   r*   cudnn_batch_norm_backwardj  s   r  c                    s"  | j  | jttdv fdd | jdd  D ]}t|dkfdd q| jtjtjtjtj	tj
fv rCtjj| |S d |d  dkrd |d  dkrtdd	 tdd  |D }td
d	 tdd  ||D }tjj| ||S dd dd  fdd}|d |d \}}}}	|d |d \}
}}}| dt|d|
f }|	s|stj|ddS dd }|||||	dd\}}|||||dd\}}d }tt|jd t|jd D ]\}}|d u r|d|d d |f }q||d|d d |f  }q|||  S )Nr  c                      
   d  S )Nz9adaptive_avg_pool2d(): Expected 3D or 4D tensor, but got r)   r)   rI  r)   r*   rw        
 z%adaptive_avg_pool2d.<locals>.<lambda>r*  r   c                      s   dt   dS )Nzjadaptive_avg_pool2d(): Expected input to have non-zero size for non-batch dimensions, but input has shape r  r  r)   r  r)   r*   rw     s    rL   c                 s   s    | ]	\}}|| V  qd S r5   r)   )r0   r  r  r)   r)   r*   rx        z&adaptive_avg_pool2d.<locals>.<genexpr>c                 s   s&    | ]\}}}||d  |  V  qdS )r!   Nr)   )r0   r  r  rA  r)   r)   r*   rx    s    
c                 S   s   t j| | |ddS )Ntruncrounding_moder_   divr]  r^  r  r)   r)   r*   start_index  s   z(adaptive_avg_pool2d.<locals>.start_indexc                 S   s    t j| d | | d |ddS )Nr!   r  r  r  r  r)   r)   r*   	end_index      z&adaptive_avg_pool2d.<locals>.end_indexc                    s   t j| t jd}||| }| | d }| | }|dkp"|| dk }|r+|d7 }n|dkr3|d8 }t j| t jd}|d| }|rbt j| d |j|jd}	t ||	}||| }
|
| }n|}||||fS )Nr  r!   r   rL   ra  )r_   rc  rd  rN   scalar_tensorr   rb  minimum)in_sizeout_sizeorangei0	maxlengthin_size_modadaptive	range_maxr  maxvali1r  )rb  r  r  r)   r*   compute_idx  s(   

z(adaptive_avg_pool2d.<locals>.compute_idx.r  )r   rL   ru   c                 S   s`   t |tr	| |fS |dk sJ ||dk}|dkrt|d}t| |d} t|| }| |fS )Nr   rL   r*  r  r   )r.   r   rN   rP   r_   r  )valsr  r  r  rJ   r  r)   r)   r*   
maybe_mask  s   

z'adaptive_avg_pool2d.<locals>.maybe_mask)r  rJ   r   )rb  r   rN  r_   ry   r   int8r  int16r  rd  nnrd  adaptive_avg_pool2dr  r  
avg_pool2drP   r   r   rM   )r   r  r  rB  kernelr  idxhlength_hrange_max_h
adaptive_hidxwlength_wrange_max_w
adaptive_wr  r  retr  jr)   )rb  r  rI  r   r  r*   r    sR   

(  



&r  r  r  c                C      t | |||d|dS )NTinplacerd   
_index_addr1   rJ   rP  r  rd   r)   r)   r*   
index_add_  s   	r  c                C   r  )NFr  r  r  r)   r)   r*   	index_add  s   
r  r  c                   s   t | j|}tjdkfdd  dkr7t | jttkp+t t	  fdd |  }| jdk}|rC| 
dn| }d| f }|rQtjntj}	|	|||dd}
|r`| S |rg|
dS |
 S )	Nr!   c                         d j  dS Nz(Index should have dimension 1 or 0 (got r  r  r)   rP  r)   r*   rw         z_index_add.<locals>.<lambda>c                      s   dt   d dS )Nzalpha argument of type z cannot be safely cast to type !)rj  r)   )rd   python_typer)   r*   rw     s    r   r5   Tr  )r?   canonicalize_dimsrI  r_   ry   dtype_to_typer   r  is_weakly_lesser_typerj  rN   rz   
index_put_	index_putr  r]  )r1   rJ   rP  r  r  rd   zero_dimr&  r  r  r   r)   )rd   rP  r  r*   r  	  s*   	

r  c                 C      t | |||ddS )NTr  _index_copyr1   rJ   rP  r  r)   r)   r*   index_copy_+  r   r  c                 C   r  )NFr  r   r  r)   r)   r*   
index_copy0  s   r  c          
         s   t | j|}t jdk fdd | jdk}|r | dn| }d|  f }|r.tjntj}||||}	|r;| S |rB|		dS |	
 S )Nr!   c                      r  r  r  r)   r  r)   r*   rw   <  r  z_index_copy.<locals>.<lambda>r   r5   )r?   r  rI  r_   ry   rN   rz   r  r  r  r]  )
r1   rJ   rP  r  r  r  r&  r  r  r   r)   r  r*   r  6  s   

r  c                 C   sL   t | d| }t t |  }| jr| d}n|}|t | |fS )Nr)   rg  )r_   r  r=  r^   r   r"  r  )rq   r   ra   r   r)   r)   r*   log_sigmoid_forwardK  s   r  r   lowhighc                 C   s"   t j| jt|t|| j| jdS )N)r  r  r   rb  )prims_uniform_helperr   r   r   rb  )r1   r  r  r)   r)   r*   r   X  s   r   c                 C   s   |d u sJ |  t| ||S r5   )r   r   )rq   r  r  r   r)   r)   r*   uniform_g  s   r
  c                 C   s   t | d }|d ur"t|d u dd  tt ||kdd  |S |d urjt|d u dd  tt ||kdd  g }t|D ]%\}}t||krZ|| |d  t|  qB|t| |d  |  qB|S tddd  d S )	Nr"   c                   S   rr  Nz9Must specify exactly one of output_size and scale_factorsr)   r)   r)   r)   r*   rw   s  rs  z.upsample_compute_output_size.<locals>.<lambda>c                   S   rr  N r)   r)   r)   r)   r*   rw   u  rs  c                   S   rr  r  r)   r)   r)   r)   r*   rw   {  rs  c                   S   rr  r  r)   r)   r)   r)   r*   rw   }  rs  Fc                   S   rr  r  r)   r)   r)   r)   r*   rw     rs  )rN  r_   ry   r  r  r  r   )r  r  scale_factorsspatial_dimensionsr  rA  r)   r)   r*   upsample_compute_output_sizen  s.   r  c                 C   s   | d u rd S | | S r5   r)   )scalesr  r)   r)   r*   get_scale_value  s   r  c                 C   s&   t |  ||}t|d}t| ||S ri   )r  r  r  upsample_nearest1d)r   r  r  osizere   r)   r)   r*   upsample_nearest1d_vec  s   
r  c                 C   s2   t |  ||}t|d}t|d}t| |||S Nr   r!   )r  r  r  upsample_nearest2d)r   r  r  r  scale_hscale_wr)   r)   r*   upsample_nearest2d_vec  s   

r  c                 C   s>   t |  ||}t|d}t|d}t|d}t| ||||S r   )r  r  r  upsample_nearest3d)r   r  r  r  scale_dr  r  r)   r)   r*   upsample_nearest3d_vec  s
   


r  c                 C   s   g }t |}| jtjkrtjn| j}t|D ]F}|| }tj||| jd}| j| |  }	|| d ur;|	|	||   n|	| }
||
 	tj
}t|d | D ]}|d}qO|| qt|S )Nra  r!   rL   )rN  r   r_   r  r  rM   rc  rb  r   r7   rd  rN   r  r  )r   r  r  r  num_spatial_dimsrX  r  r  output_indicesisizere   input_indicesrO   r)   r)   r*   !_compute_upsample_nearest_indices  s   $r"  r  c                 C   s"   t | ||f\}t| d d |fS r5   r"  rz   _unsafe_index)r   r  r  	l_indicesr)   r)   r*   r    s   r  scales_hscales_wc           
      C   sj   t | |||f\}}t| d d ||f}t| }| j\}}	}}| jjdkr-|	dk r-tj	}|j
|d}|S )Ncudar  r(  )r"  rz   r$  r?   r  r   rb  rj  r_   r-  r]  )
r   r  r&  r'  	h_indices	w_indicesr   r)  rO   
n_channelsr)   r)   r*   r    s   	

r  scales_dc           	      C   s2   t | ||||f\}}}t| d d |||f}|S r5   r#  )	r   r  r,  r&  r'  	d_indicesr)  r*  r   r)   r)   r*   r    s
   

r  c                    sb   |r|rd n|rd n|rd nd t   dks!J t  fddtdt  D S )Nr  r  r~   r"   r   c                    s    g | ]}t ||   qS r)   r  r  
group_sizeparamsr)   r*   r2   	  s    z!gather_params.<locals>.<listcomp>)rN  rM   )r0  
has_biaseshas_projectionsr)   r.  r*   gather_params	  s   r3  c                 C   sh   |r!| d|  |d|  }}| d| d  |d| d  }}n| | || }}d\}}||||fS )Nr"   r!   NNr)   )r0  hiddensr  bidirectional
cur_params
cur_hiddenbidir_paramsbidir_hiddenr)   r)   r*   params_hiddens	  s   $r;  c                 C   s2   ||ksJ | | d|||  | dd|S ri   )r  r  )r8  last_batch_size
batch_sizer5  r)   r)   r*   update_hidden_for_packed	  s   r>  c              	   C   s4   ||kr| S ||k sJ t | |d||| fS ri   )r_   concatr  )r8  r<  r=  
inp_hiddenr)   r)   r*    update_hidden_for_packed_reverse"	  s   rA  c                 C   s$  |d }|d }|r|d nd }	|r|d nd }
g }g }|r"|d n|d }| dd|}t| t|}|r>|d d d }|D ]-} | jd }||krLn|rVt||||}nt||||}|| |||	||
}|}|| q@|ru|  n	|| |  t	|d}|st	|dn|}||fS )Nr   r!   r"   r~   rL   )
r  r_   r  rA  r   rA  r>  r  reverser  )inphiddenr0  r1  	hidden_fnbatch_sizesrB  	ih_weight	hh_weightih_biashh_biasstep_outputr5  r<  r8  	split_inpr  r   
hidden_outr)   r)   r*   one_layer_rnn_data0	  s@   


rN  c                        fdd}|S )Nc                    s    t ||||  S r5   r   linearr  r8  rG  rI  rH  rJ  nonlinearityr)   r*   rF   _	  s   zrnn_cell.<locals>.innerr)   rT  rF   r)   rS  r*   rnn_cell^	  s   rV  c                    rO  )Nc                    s$   t | ||}  t ||||  S r5   rP  rR  rS  r)   r*   rF   f	  s   zrnn_cell_data.<locals>.innerr)   rU  r)   rS  r*   rnn_cell_datae	  s   rW  c                 C   s   |d }|d }|r|d nd }|r|d nd }	t | ||}
|r&|
dn|
}
|d}g }|
D ]}|||||||	}|| q1|rH|  t|d}||dfS )Nr   r!   r"   r~   )	r   rQ  fliprN   r  rB  r_   r  r  )rC  rD  r0  r1  rE  rB  rG  rH  rI  rJ  precomputed_inputr8  rK  r  r   r)   r)   r*   one_layer_rnnm	  s   
rZ  c                 C   s   |d }|d }|r|d }|d }nt | }t | }|d d}	|d d}
g }d}|	d}d}d}d}d}|  } |	 }	|
 }
t jjj| |||||	|
|||||||||}|d |d |d }}}||	d|	dffS )Nr   r!   r"   r~   F)
r_   zerosr  rN   r]  r  rz   mkldnn_rnn_layerr  r  )rC  rD  r0  r1  rB  w0w1w2w3hxcxrF  modehidden_size
num_layersr6  batch_firstr  outputsrR   hycyr)   r)   r*   mkldnn_one_layer_lstm	  sN   


rj  c
                 C   s   |r|  ddn| } g }
t|D ]^}t||||\}}}}|r'||d k r'|nd}|	| |||\}}|
| |rI|	| |||dd\}}|
| |rXt||g| d } n|} |dkrn|rn||d k rntj| |dd} q|rw|  ddn| } | |
fS )Nr   r!   r   T)rB  )r  )	transposerM   r;  r  r_   r  rJ   r  )r   rD  r0  r1  re  r  r  r6  rf  layer_fnfinal_hiddensr  r7  r8  r9  r:  fwd_inp
fwd_hiddenbwd_inp
bwd_hiddenr)   r)   r*   _rnn_helper	  s,   



rr  c	                 C   R   | d}	t||d}t| |	|||||||ttttjd
\}
}|
t|dfS Nr   FrE  )	unbindr3  rr  r   rZ  rV  r_   r   stackr   ra  r0  r1  re  r  r  r6  rf  rD  r   rm  r)   r)   r*   rnn_tanh_input	     
ry  c	                 C   rs  rt  )	rv  r3  rr  r   rZ  rV  r_   r$  rw  rx  r)   r)   r*   rnn_relu_input	  rz  r{  c	                 C   T   | d}	t||d}t| |	||||||dtt|ttjd
\}
}|
t|dfS Nr   FrF  rE  )	rv  r3  rr  r   rN  rW  r_   r$  rw  datarF  ra  r0  r1  re  r  r  r6  rD  r   rm  r)   r)   r*   rnn_relu_data
  &   
r  c	                 C   r|  r}  )	rv  r3  rr  r   rN  rW  r_   r   rw  r  r)   r)   r*   rnn_tanh_data?
  r  r  c                 C   s   t ||||  }|d|}|d  }	|d  }
|d  }|d  }|
| |	|  }||  }|d u r;|nt ||d }||fS )Nr  r   r!   r"   r~   r   rQ  chunkr   r   )rC  ra  rb  rH  rJ  	hr_weight	chunk_dimgateschunked_gatesin_gateforget_gate	cell_gateout_gateri  rh  r)   r)   r*   	lstm_cellb
  s   r  c              
   C   s   |d }|d }|r|d nd }|r|d nd }t |dkr"|d nt |dkr,|d nd }	|d d}
|d d}t| ||}|rJ|dn|}g }|D ]} t| |
||||	dd\}
}||
 qP|rk|  t	|d}||

d|
dffS )Nr   r!   r"   r~   r  r  r  )rN  rN   r   rQ  rX  r  r  rB  r_   r  r  )rC  rD  r0  r1  rB  rG  rH  rI  rJ  r  ra  rb  rY  rK  r   r)   r)   r*   one_layer_lstmp
  s$   *r  c              
   C   s
  |d }|d }|r|d nd }|r|d nd }	t |dkr"|d nt |dkr,|d nd }
g }g }|r8|d n|d }t| t|}|rM|d d d }|d }|d }|dd||dd|}}|D ]l} | jd }t| ||} ||k r||d||| |d||| f |dd||dd|}}||krt	||d||| fd}t	||d||| fd}t
| ||||	|
dd\}}|}|| qf|r|  ||f}n|||f |  t| \}}t|dt|df}t|d}||fS )	Nr   r!   r"   r~   r  r  rL   r  )rN  r_   r  rA  r  r   r   rQ  r  r?  r  rB  r  r  )rC  rD  r0  r1  rF  rB  rG  rH  rI  rJ  r  rK  r5  r<  rL  orig_hxorig_cxra  rb  r  rM  hidden0hidden1r   r)   r)   r*   one_layer_lstm_data
  s\   *

r  c                 C   s   dd }|| ||rt S tS )a   Check whether we could use decompose lstm with mkldnn_rnn_layer.
    All the below conditions need to be met:
        * ``torch._C._has_mkldnn`` returns ``True``.
        * All the input args are on CPU.
        * The dtypes of args are either torch.float or torch.bfloat16.
        * Inference.
        * ``has_projections`` returns ``False``.

    Args:
        * input: the input sequence to LSTM
        * hx: a tuple of the input hidden state and cell state ``(h_0, c_0)`` to LSTM
        * params: the weight and bias tensors of LSTM
    c           	      S   s   t jjsdS | gt| tt| }dd |D }t|dkr#dS | }|t dkr0dS dd |D }|D ]}|t j	t j
fvrF dS q9| jrLdS |d d|d dk}|r^dS d	S )
NFc                 S      h | ]}|j qS r)   r5  r0   tr)   r)   r*   	<setcomp>
      zEselect_one_layer_lstm_function.<locals>.use_mkldnn.<locals>.<setcomp>r!   rf  c                 S   r  r)   r   r  r)   r)   r*   r  
  r  r   r"   T)r_   _C_has_mkldnnrA  r   from_iterablerN  poprb  r  bfloat16requires_gradr  )	r   ra  r0  tensorsdevicesrb  dtypesr   r2  r)   r)   r*   
use_mkldnn
  s(   z2select_one_layer_lstm_function.<locals>.use_mkldnn)rj  r  )r   ra  r0  r  r)   r)   r*   select_one_layer_lstm_function
  s   r  c	                 C   s   t |dks
J dt|||d d|d dk}tt|d |d }	t| ||}
t| |	||||||||

\}}tt| }|t|d dt|d dfS )Nr"   lstm expects two hidden statesr   r!   )	rN  r3  r  rA  r  r  rr  r_   rw  )r   ra  r0  r1  re  r  r  r6  rf  rD  rl  r   rm  r)   r)   r*   	lstm_impl
  s$   $"r  c	                 C   s   t |dks
J dt|||d d|d dk}tt|d |d }	t| |	||||||dtt|d
\}
}tt| }|
t	|d dt	|d dfS )Nr"   r  r   r!   F)rF  )
rN  r3  r  rA  r  rr  r   r  r_   rw  r  r)   r)   r*   lstm_data_impl  s"   $
"r  c                 C   sr   |  dd}t||| dd}|d |d   }|d |d   }	|d |d |   }
||
 |	 |
 S )Nr~   r!   r"   r   )r  r   rQ  r   r   rC  r8  rG  rI  rH  rJ  chunked_igateschunked_hgates
reset_gate
input_gatenew_gater)   r)   r*   gru_cell?  s   r  c                 C   s|   t | ||dd}t |||dd}|d |d   }|d |d   }	|d |d |   }
||
 |	 |
 S )Nr~   r!   r   r"   r  r  r)   r)   r*   gru_cell_dataH  s   r  c	                 C   sJ   t ||d}t| |d||||||dtt|td
\}	}
|	t|
dfS )NFr   r~  )r3  rr  rv  r   rN  r  r_   rw  )r  rF  ra  r0  r1  re  r  r  r6  r   rm  r)   r)   r*   gru_impl_dataQ  s   r  c	                 C   sH   t ||d}t| |d|||||||tttd
\}	}
|	t|
dfS )NFr   ru  )r3  rr  rv  r   rZ  r  r_   rw  )r   ra  r0  r1  re  r  r  r6  rf  r   rm  r)   r)   r*   gru_implo  s   
r  c                 C   s:   t |  ||}t|d}t|d}tjj| ||||S r  )r  r  r  r_   r  rz   _upsample_bilinear2d_aar   r  align_cornersr  r  r  r  r)   r)   r*   upsample_bilinear2d_aa_vec  s   


r  c                 C   s4   t |  ||}t|d}t|d}t| ||||S r  )r  r  r  upsample_bilinear2dr  r)   r)   r*   upsample_bilinear2d_vec  s   

r  r  c           $      C   sf  | j \}}}}|d }	|d }
|	dkr+|r|d |	d  }n|d ur&d| n||	 }nd}|
dkrI|r<|d |
d  }n|d urDd| n||
 }nd}tj|	| j| jd}tj|
| j| jd}|rj|| }|| }n||d  d jdd}||d  d jdd}|tj}t|j|d dtj}|tj}t|j|d dtj}|	d}|	d}|	d}t
| d d ||g}t
| d d ||g}t
| d d ||g}t
| d d ||g}|| }d| }|| }d| }t||t|| } t||t|| }!t| |t|!| }"t| }#| jjd	kr+|d
k r+tj}#|"j|#d}"|"S )Nr   r!   r]   r   ra  r   r   r   r(     r(  )r   r_   rc  r   rb  r   r7   rd  ceilrN   rz   r$  r   r?   r  rj  r-  r]  )$r   r  r  r&  r'  n_batchr+  in_hin_wr  r  h_scale_factorw_scale_factorr  r  r1   rR   x_floorx_ceily_floory_ceilx_viewx_floor_viewx_ceil_viewv1v2v3v4xscale2xscale1yscale2yscale1q1q2r   r)  r)   r)   r*   r    sV   




r  r]  r^  c                 C   s   | j |j kS r5   r  )r]  r^  r)   r)   r*   is_same_size  rt   r  c                 G   rn   r5   )rz   r9  )r1   r   rA   r)   r)   r*   _reshape_alias  rt   r  c                 C   rn   r5   )rz   rP  )r1   r  r)   r)   r*   _index  rt   r  c                 C   sV  |   }d}|dk rd}|d ur,|dkr&dg| }|jd ||< ||}n|}| | } t||k|d}	|	|}
t| ||
| }t||k|d}|tj	j
krb|dkrb| dd}||fS |d ur|| j}t|||
|}t||k|d}| }n	||k | }|tjj
kr| }||fS |tjj
kr| | }||fS )Nr!   r"   r   r)   r   )rJ   r   r9  r_   r`   rN   gatherr  r    r&   rr   r  expandr   r7   r(   r'   )rq   r   r   r   r   n_dimsr   r   wr  safe_target_r   r   wsumr)   r)   r*   _nll_loss_forward  sB   


r  c                 C   s   |   dkr|   dksJ d|  dksJ d|   dko%|  dk}|s?| jd |jd ks?J d| j d|j d| jd	 }|d u s_|  dkrT| |ks_J d
| d|j t| ||||S )Nr   r"   r  r!   r  r  r  r  rL   z/weight tensor should be defined either for all z7 classes or no classes but got weight tensor of shape: )rJ   r   r   r  )rq   r   r   r   r   r  	n_classesr)   r)   r*   nll_loss_forward8  s    
r  c                 C   s   t | ||||S r5   )r  )rq   r   r   r   r   r)   r)   r*   nll_loss2d_forwardS  s   r  Ac                 C   s    |d |  |d  |  |  d S )Nr"   r~   r!   r)   r1   r  r)   r)   r*   _upsample_cubic_convolution1`  r  r  c                 C   s(   ||  d|  |  d|  |  d|  S )Nr     r  r)   r  r)   r)   r*   _upsample_cubic_convolution2d  s   (r  r  c                 C   s4   d}t | d |t| |td|  |t d|  |fS )Ng      r]   r   )r  r  )r  r  r)   r)   r*    _upsample_get_cubic_coefficientsh  s   r  coeffstsc                 C   s    t |}tdd t| |D S )Nc                 s   s    | ]	\}}|| V  qd S r5   r)   )r0   rD  rE  r)   r)   r*   rx  t  r  z+_upsample_cubic_interp1d.<locals>.<genexpr>)r  _sum_tensorsr  )r  r  coeffs2r)   r)   r*   _upsample_cubic_interp1dr  s   r  c                 C   s   t tj| S r5   )r   r_   add)r  r)   r)   r*   r  x  s   r  	num_stepsc                 C   sB   | dkrt jd||dS |s| d |  nd}t j| || ||dS )Nr!   r   r  )stepsrb  r   )r_   r  linspace)r  r  r   rb  r]  r)   r)   r*   _linspace_from_neg_one|  s   r  thetahr  c           	      C   s   | j }| j}t||||d|d}t|||||dd}tjd||d}tjjj|dddd}tjjj|dddd}tjjj|d	ddd}|| | S )
Nr!   )r!   r!   r!   ra  )r   r"   constantr   r  rc  rr   r!   r!   )r"   r   	r   rb  r  r9  r_   r  r  rd  r  )	r  r  r  r  r   rb  grid_xgrid_ygrid_oner)   r)   r*   _make_base_grid_4d  s   r  r  c                 C   s   | j }| j}t||||dd|d}t||||d|dd}t|||||ddd}	tjd||d}
tjjj|dddd}tjjj|dddd}tjjj|	d	ddd}	tjjj|
d
ddd}
|| |	 |
 S )Nr!   )r!   r!   r!   r!   ra  )r   r~   r  r   r  r  )r"   r!   )r~   r   r  )r  r  r  r  r  r   rb  r  r  grid_zr  r)   r)   r*   _make_base_grid_5d  s   r  c           	      C   sL   |\}}}}t | |||d}|ddd| jd d}||||dS )Nr  rL   r~   r!   r*  r"   )r  r9  r/  rN   r   )	r  r  r  rr  rO   r  r  	base_gridgridr)   r)   r*   _affine_grid_generator_4d  s    r  c           
      C   sR   |\}}}}}t | ||||d}|ddd| jd d}	|	||||dS )Nr  rL   r  r!   r*  r~   )r  r9  r/  rN   r   )
r  r  r  rr  rO   r  r  r  r  r  r)   r)   r*   _affine_grid_generator_5d  s    r  c                 C   s@   t t|dv dd  t|dkrt| ||dS t| ||dS )N)r  r  c                   S   rr  )NzCaffine_grid_generator needs 4d (spatial) or 5d (volumetric) inputs.r)   r)   r)   r)   r*   rw     rs  z'affine_grid_generator.<locals>.<lambda>r  r  )r_   ry   rN  r  r  )r  r  r  r)   r)   r*   affine_grid_generator  s   
r  r  interpolation_modepadding_modec                    s  t dv fdd t dv fdd dtdtdtffdd	dtd
tdtdtfdddtdtdtffdddtdtdtffdd}j\}
|j\}}dtdtdtf
fddt jjddddt j|jdd|dd dtdtdtdtffdddtdtdtf fdd|d }|d  }	d!kr||}
||	
}|
	 |	 d }}d }}||}}||
 ||  }|
| ||  }||
 ||  }|
 |  }t
fd"d#|f|||f|||f|||ffD S dkr4||}
||	
}|
 }| }||dS |}
|	
}|
	 |	 |
 | }dtdtdtf
fd$d%	d&tdtf	fd'd(tfd)d#td*D }t||dS )+N)r   r!   r"   c                      r  )NzInvalid interpolation mode r)   r)   )r   r)   r*   rw     r  z!grid_sampler_2d.<locals>.<lambda>c                      r  )NzInvalid padding mode r)   r)   )r  r)   r*   rw     r  coordsr  rK   c                    s0    r|d d n|d }|d d }| | | S Nr   r)   )r  r  r   ofsr  r)   r*   unnormalize  s   z$grid_sampler_2d.<locals>.unnormalize	twice_low
twice_highc                 S   sv   ||kr	t | S |d }|| d }| |  }t ||}||  jt jd}t |d@ dk|| || | S )Nr"   r   r!   r   )r_   r   r   fmodfloorr7   r  r`   )r  r  r  
coords_mincoords_spancoords2extraflipsr)   r)   r*   reflect_coordinates  s   
z,grid_sampler_2d.<locals>.reflect_coordinatesc                    sf   dkr| S dkrt | d|d S  r | dd|d  }n
| dd| d }t |d|d S )Nr   r!   r"   rL   r   )r  r  coords_reflected)r  r  r  r)   r*   compute_coordinates  s   z,grid_sampler_2d.<locals>.compute_coordinatesc                    s   | |} ||S r5   r)   )r  r  	coords_un)r  r  r)   r*   compute_source_index  s   

z-grid_sampler_2d.<locals>.compute_source_indexxsysc                    s,   t d| kt | k t d|k| k S ri   )r_   r  )r  r  )iHiWr)   r*   in_bounds_cond  s   $z'grid_sampler_2d.<locals>.in_bounds_condr5  r!   wsc                    s@   | | t  fdd| jtjd|jtjd|fD S )Nc                 3   s*    | ]}t |d  dV  qdS r   r!   N)r_   r`   r9  r  )r+  r  oHoWr)   r*   rx    rM  z0grid_sampler_2d.<locals>.clip.<locals>.<genexpr>r   )r  r7   r_   rd  )r  r  r  )r+  r  r  r  )r  r*   clip  s   
zgrid_sampler_2d.<locals>.clipixiyc                    s&   | ||\}}} ||f | S r5   r)   )r  r  r  idx_xidx_yw_)C_idxN_idxr]  r  r)   r*   get_summand  s   z$grid_sampler_2d.<locals>.get_summand).r   ).r!   r   c                 3   s"    | ]\}}} |||V  qd S r5   r)   )r0   r  r  r  )r%  r)   r*   rx  -  
    

z"grid_sampler_2d.<locals>.<genexpr>c                    s     | } |}||dS rS   r)   )r  r  r1   rR   )r  r%  r  r  r)   r*   get_value_boundedH  s   

z*grid_sampler_2d.<locals>.get_value_boundedr  c                    sL   | d  } d | | d | d |f}t |dS )Nr!   r"   )r  rN   )r  iy_ofscs)r'  ix_nwiy_nwtxr)   r*   	get_coeffM  s   z"grid_sampler_2d.<locals>.get_coeffc                 3       | ]} |V  qd S r5   r)   )r0   r  )r-  r)   r*   rx  W  ry  r  )r_   ry   r   r  r   rc  rb  r9  r   r	  r  roundr  rM   r  rN   )r]  r  r   r  r  r  r,  rO   r1   rR   r  r  ix_neiy_neix_swiy_swix_seiy_sew_nww_new_sww_se
ix_nearest
iy_nearesttyr  r)   )r#  r+  r$  r]  r  r  r  r-  r%  r'  r  r  r  r   r*  r+  r  r  r  r  r,  r  r*   grid_sampler_2d  sl   	
$ 




	



 
r=  c                    s`   t   dko dk fdd t  ddk fdd   jddS )Nr"   r!   c                      s   d    d   S )Nzmatrix @ vector expected, got r2  ru   r)   rq   r&  r)   r*   rw   a  r3  zmv.<locals>.<lambda>r   c                      s*   d  d d  d d d dS )Nzsize mismatch, got input (r   r1   r!   z), vec (r  )r  r)   r>  r)   r*   rw   e  s   * ru   )r_   ry   rJ   r  r   r>  r)   r>  r*   r'  [  s   r'  c                    s     r, r   rt    S t  S   r,t  S t dko9  dk fdd tj jk fdd  fdd}t	  	 k|   
 S )Nr!   c                      s   d   d    dS )Nz1D tensors expected, but got zD and z	D tensorsru   r)   r#  rq   r)   r*   rw   y  s    zdot.<locals>.<lambda>c                      s   dj  d j  S )Nz:dot : expected both vectors to have same dtype, but found  and r   r)   r?  r)   r*   rw   }  r1  c                	      s.   d   d    d   d    d	S )Nz+inconsistent tensor size, expected tensor [z] and src [z.] to have thesame number of elements, but got r@  z elements respectivelyr4  r)   r?  r)   r*   numel_error  s   zdot.<locals>.numel_error)r  is_conjr_   dotconjvdotry   rJ   r   r   r   )rq   r#  rA  r)   r?  r*   rC  j  s$   
rC  c                 C   s   |   d}|d ur+|d | d }d| |  ||  |  |    |   }nd| |  | |  |  |     }|d urI|| }t||S r  )r0  r^   r  r   )rq   r   r   
pos_weightr   r   
log_weightr   r)   r)   r*    binary_cross_entropy_with_logits  s   

rH  tensor1tensor2c                 C   s   | j |j kr
| |fn|| f\}}|j dkr|j dksdS |jr!dS | j dkr(dS | dkr0dS |j}| }tdd t|d d |d	d
 |d	d
 D S )Nr~   r"   FTr   c                 s   s"    | ]\}}}||| kV  qd S r5   r)   )r0   st1st2s2r)   r)   r*   rx    r&  zshould_fold.<locals>.<genexpr>r*  r!   rL   )rI  r  r   r   rB  r|  r  )rI  rJ  t1t2t1_shape	t1_strider)   r)   r*   should_fold  s    
"rR  c                 C   sn  |   }|  }|dkr|dksJ |dkr |dkr t| |S |dkr.|dkr.t| |S |dkrD|dkrDttt| d|dS |dkrR|dkrRt| |S t| |r||k}|r`|jn| }|sf|n	|dkrn| 	 n| }|j
}t|d d }ttj|}	|  dk}
|
r||j
d  ||	|d }|
r|||}|r|j S |S |||S |dkr|dkr|dkr| dnd}| d}| j
d d }|dkr|dn|d}|dkr|dnd}g }t|d D ]
}||| q|dkr<|dkr<|d |d kr<|d dkr(|  r(t| d|S |d dkr<| r<t| |dS tt||}|||g }t|}| ||||}|dk}|rq||g }||||d}n|||g }|||||}|}|dkr|| |dkr|| |r||d|S |||S tddd	  d S )
Nr   r!   r"   rL   r*  r~   Fc                   S   rr  )Nz/both arguments to matmul need to be at least 1Dr)   r)   r)   r)   r*   rw   .  rs  zmatmul.<locals>.<lambda>)rJ   r_   rC  r'  r  r  rN   rR  r/  r  r   rA  r   operatorr   r  r   r9  r]  r  rM   r  r.  broadcast_shapesr  r  bmmry   )rI  rJ  dim_tensor1dim_tensor2rk  rN  rO  sizes_1output_shapefolded_dim1t2_is_matrix	t1_foldedr   rr  m1batch_tensor1m2r$  batch_tensor2r  expand_batch_portiontensor1_expand_sizeexpand_batch_producttensor1_expanded
vector_rhstensor2_expand_sizetensor2_expandedr)   r)   r*   r.    s   
	










r.  r  r  c                    s  j \}}|\}}ddd}	dd }
|	|||}|	|||}tj|jd|dddtj|jdd|dd tj|jddd|df}tj|jdddd|f}|
|||}| }|| |jtjd}|
|||}| }|| }|jtjd}|d ||d |d f}|d ||d |d f fd	d
fddtfdd|D }t	||}t
}|j|d}|S )Nc                 S   s@   |r|dkr| d |d  S dS |d ur|dkrd| S | | S )Nr!   r   r)   )r  r  r  re   r)   r)   r*   compute_scale=  s    z1upsample_bicubic2d_default.<locals>.compute_scalec                 S   s   |r| | S | |d  d S r  r)   )re   	dst_indexr  r)   r)   r*   r  C  s   z8upsample_bicubic2d_default.<locals>.compute_source_indexr5  r!   r   r"   c                    s8   t | dd }t |dd }t ||gS r  )r_   r   rz   r$  )r  r  y_idxx_idx)r#  r$  r]  r  r  r)   r*   load_bounded^  s   z0upsample_bicubic2d_default.<locals>.load_boundedc                    s"   t  fddD }t|S )Nc                 3   s    | ]} |V  qd S r5   r)   )r0   x_ofs)rl  rR   r)   r*   rx  d  s    zCupsample_bicubic2d_default.<locals>.get_x_interp.<locals>.<genexpr>)r  r  )rR   coeffs_x)ixs_ofsrl  t_x)rR   r*   get_x_interpc  s   
z0upsample_bicubic2d_default.<locals>.get_x_interpc                 3   r.  r5   r)   )r0   y_ofs)rq  r)   r*   rx  g  ry  z-upsample_bicubic2d_default.<locals>.<genexpr>r(  r5   )r   r_   rc  rb  r9  r	  r7   rd  r  r  r?   r  r]  )r]  r  r  r  r  r+  r,  r  r  rh  r  height_scalewidth_scaleout_yout_xreal_xin_xr  real_yin_yt_yr  iys_ofscoeffs_yr   r)  r)   )	r#  r$  r]  rq  r  r  ro  rl  rp  r*   upsample_bicubic2d_default1  s6   	


r~  r  c                 C   s   t t|t| dkdd  |d u r2|d usJ ttttf tdd t| jdd  |D }|r6|nd\}}t	| ||||S )Nr!   c                   S   rr  )Nz:Must specify exactly one of output_size and scale_factors.r)   r)   r)   r)   r*   rw   }  rs  z(upsample_bicubic2d_vec.<locals>.<lambda>c                 s   s$    | ]\}}t t|| V  qd S r5   )r   r   )r0   r  re   r)   r)   r*   rx    s
    
z)upsample_bicubic2d_vec.<locals>.<genexpr>r"   r4  )
r_   ry   r  r   r   r  r  r  r   r~  )r]  r  r  r  r  r  r)   r)   r*   upsample_bicubic2d_vecp  s   
r  r   r   r[  c                C   s(   t j| ||d}t j| ||d}||fS )Nr[  )r_   aminr  )rq   rJ   r\  r  r  r)   r)   r*   aminmax  s   r  r   c                C   s"   t jtt| d| |||dS )Nr   r   )rz   r   r_   r`   isnan)rq   rJ   r\  r   r)   r)   r*   nansum  s   "r  r   r  rb  r  r  c             	   C   s   t jjd| d||||dS )Nr   r!   r  rz   rc  
start_step)r;  r   r  rb  r  r)   r)   r*   arange_default     
r  c             	   C   s   t jj| |d||||dS )Nr!   r  r  )r:  r;  r   r  rb  r  r)   r)   r*   arange_start  r  r  marginc           	         s  t t jd jd  t |dkp|dkdd  t jdko, dkfdd t jdko? kfdd d urdt t jdko\  k fdd dt jdd	}||  }|	d}|dkr|n|| }d ur|  }t j
 jd
}t |k|d}|tjjkr| S |tjjkr| |jd  S |jddS )Nr   r!   r"   c                   S   rr  )Nz only p == 1 and p == 2 supportedr)   r)   r)   r)   r*   rw     rs  z#multi_margin_loss.<locals>.<lambda>c                      s   d j  S NzMExpected non-empty vector or matrix with optional 0-dim batch size, but got: r  r)   )r   r)   r*   rw     s    c                         d  dj  S )Nz#inconsistent target size, expected r  r  r)   )nframer   r)   r*   rw     r  c                      r  )Nz#inconsistent weight size, expected r  r  r)   )rJ   r   r)   r*   rw     r  rJ   rP  r5  ru   )r_   
atleast_2d
atleast_1dr   ry   rI  r   rN   r  r0  rc  rb  r`   r    r'   rr   r   r(   r   )	r   r   r$  r  r   r   ura   r  r)   )rJ   r   r  r   r   r*   multi_margin_loss  sB   







r  	is_targetc                    s  | j  |j t| } t|}| j d }tt dko |dk fdd ttdko2 k fdd tj||jd}|dk}tjt|||dd	d
}||k }t||d}tj	| d|d}	t||d}
tj
||
jddkdd}d|	jjdd |  }|d}|| }t|d|}|tjjkr|jdd }n|tjjkr| }n|jdd}|| j}||fS )Nr!   r"   r   c                      r  r  r)   r)   )orig_input_shaper)   r*   rw     r  z0multilabel_margin_loss_forward.<locals>.<lambda>c                      s   d d  S )Nzinconsistent target size: z for input of size: r)   r)   r  orig_target_shaper)   r*   rw     rx   r5  rL   Tr[  r  ru   r]   )r   rL   )r   r_   r  ry   rN  rc  rb  r  r`   r  anyrN   Tr0  r    r'   rr   r   r   r(   r7   r   r   )r   r   r   rJ   r  is_endend_idxtarget_masktidx0r  tidx1r  ra   r)   r  r*   multilabel_margin_loss_forward  s@   





r  c                    s   t |  fdd}|S )Nc                     s    | i |}| d  |S ri   )r   )rA   rB   r   outplace_opr)   r*   
inplace_op  s   z$register_inplace.<locals>.inplace_opr   )aten_opr  r  r)   r  r*   register_inplace  s   r  )F)r   )FNr  )r   NNr!   r5   r   )rL   FFrg  r  )r!   r!   F)r   r]   r  r4  )NNN)r   r   Frx  (A  rG   rS  rC  enumr   r   r   	itertoolsr   r   typingr   r   r	   r
   r   r   r   r_   torch._primsr  r  torch._prims_common_prims_commonr?   torch.nn.functionalr  rd  r   r   r   r   torch._decompr   r   r   r   r   torch._prims_common.wrappersr   r   r   r   %torch.fx.experimental.symbolic_shapesr   r   torch.utils._pytreer   r   r  DispatchKeyr   str__annotations___opsr  rz   r    r  r  rI   r  compute_only_pw_cast_for_opmathpw_cast_for_opmathINT_TO_FLOATpw_cast_for_int_to_realr  rP   rW   rY   rb   r  rm   fillScalarrs   r|   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  py_implAutogradCUDA	Generatorr   r   r   r   r   r   r   r'   rr   r   r   r   r   r   r   r   r   r  r  r  r  r  r   r!  r"  r%  r8  r?  slicerO  rR  rW  rY  r_  r`  rn  r  r  r  r  r  r  CompositeImplicitAutogradAutogradr  r  r  rsubr  r  r  r  r  r  r  r  r  r   r%  r(  rG  rI  r`  rv  r{  r  unsafe_chunkr  r~  r}  no_statsr  r  _fused_dropoutr  r  r  r  rb  r)  detachlift
lift_freshr  r  r  r  r  _adaptive_avg_pool2dr  r  r  r  r  r  r  r  r   r
  r  r  r  r&  r  r  r  r  r  r"  r3  r;  r>  rA  rN  rV  rW  rZ  rj  rr  rnn_tanhr   ry  rnn_relur{  r  r  r  r  r  r  r  lstmr  r  r  r  grur  r  r  r  r  r  r  r  _unsafe_viewr$  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r=  r'  rC  rH  rR  r.  upsample_bicubic2dr~  r  r  r  rc  r  r  r  r:  r  r  r  r  addbmm_addbmmaddmm_addmv_baddbmm_baddbmmfill_gelu_r#  
hardswish_	hardtanh_hardtanhhardsigmoid___iand____and____ilshift__
__lshift__r  r  index_reduce_index_reduce__ior____or____irshift__
__rshift____ixor____xor__leaky_relu_r   logit_logitrelu_r$  renorm_renormround_r/  scatter_r   scatter_add_scatter_addscatter_reduce_scatter_reducesilu_r)   r)   r)   r*   <module>   s
  
 $

$



	




 *!	
2 
	P`
 
 
 (((
	
V	L	
R		#

	

	%	
c	"j""$$



	

	
.2
)


  ?
2
	
	


	

N 

5


&
,""


(
 



$w
=



,

0	