o
    hz                     @   s   d dl Z ddlmZmZ ddlmZ ddlmZ dd Zeddd	 ied
dd	 ied
ejfddZ	eddd	 ied
dd	 ied
ejfddZ
G dd de jjZejZdS )    N   )
heuristicsjit)languagenext_power_of_2c                 C   s   | dk rdS | dk rdS dS )Ni      i           )Nr   r   N/var/www/html/ai/venv/lib/python3.10/site-packages/triton/ops/cross_entropy.py	num_warps   s
   r   c                 C      t | d S Nr   r   nargsr   r   r   <lambda>       r   BLOCKc                 C   r   r   r   r   r   r   r   r      r   c                 C   s   t d}t d|}t || }| ||  | } |||  | }	|||  | }
t j| ||k td d}|t j}|t |d }t t 	t 
|d| }t j|	|||k d t   t |
}t || | d S Nr   inf)maskother)r   )tl
program_idarangeloadfloattofloat32maxlogsumexpstoredebug_barrier)LOGITSPROBSIDXLOSSr   r   rowcolsidx
WRIT_PROBS
READ_PROBSlogitsprobsr   r   r   _forward   s   

r3   c                 C   r   r   r   r   r   r   r   r   )   r   c                 C   r   r   r   r   r   r   r   r   *   r   c                 C   s   t d}t d|}t || }| ||  | } t j| ||k tdd }t |t j}||k}	t || }
||	 |
 }t j| || j	j
||k d d S r   )r   r   r   r   r   r%   r    r!   r&   dtype
element_ty)r)   r*   DPROBSr   r   r,   r-   r.   r2   deltadoutdinr   r   r   	_backward)   s   
"r:   c                   @   s$   e Zd Zedd Zedd ZdS )_cross_entropyc           	         s~   |j tjks
J d j j }} jd tj|||d}tj ||d} fdd}t|  ||| ||| |S )Nz(Indices are expected to be of type long.)r4   devicec                    s       fS Nnumeloptr1   n_colsr   r   r   H       z(_cross_entropy.forward.<locals>.<lambda>)r4   torchint64r=   shape
empty_liker3   save_for_backward)	clsctxr1   indicesr=   r4   resultneg_logprobsgridr   rC   r   forward>   s   
z_cross_entropy.forwardc                    s<   |j \}jd   fdd}t| ||  dfS )a  We know d(-log(p[i])/dlogit[k] = -id_mat[i,k] + p[k]
        so we initialize the gradient as neg_logprobs, so we can just exponentiate
        to get p[k], which is most of what we need...  neg_logprobs will be
        modified in place to become the gradient we want
        r<   c                    s       fS r>   r?   rA   rD   rO   r   r   r   Z   rE   z)_cross_entropy.backward.<locals>.<lambda>N)saved_tensorsrH   r:   )rK   rL   dneg_logprobsrM   rP   r   rR   r   backwardN   s
   

z_cross_entropy.backwardN)__name__
__module____qualname__classmethodrQ   rU   r   r   r   r   r;   =   s
    
r;   )rF    r   r   r   r   r   r   	constexprr3   r:   autogradFunctionr;   applycross_entropyr   r   r   r   <module>   s    
"