o
    œÜÓh>  ã                   @   s  d dl Z d dlZd dlmZ d dlmZ d dlZd dlmZ	 d dl
mZmZ dd„ ZejZG dd„ dejjƒZG d	d
„ d
ejjƒZG dd„ dejjƒZdd„ Zddededededededefdd„Zddededededededefdd„Z				d dedededefdd„ZdS )!é    N)Ú	dataclass)Úreduce)ÚMatmulLtStateÚGlobalOutlierPoolerc                 C   s   t tj| dƒS )Né   )r   ÚoperatorÚmul)Úiterable© r
   ú_/var/www/html/ai/venv/lib/python3.10/site-packages/bitsandbytes/research/autograd/_functions.pyÚprod   s   r   c                   @   ó&   e Zd Zeddd„ƒZedd„ ƒZdS )ÚMatMulFP8MixedNé   c                 C   sH  d| _ t|jƒdkrId| _ || _|| _|j}|jd |d kr4tj|jd d… |dd …  |j|jdS tj|jd d… |d d…  |j|jdS t	j
|||d\}	}
t	j|	|
|d |j¡}t	j| ¡ |d	\}}
t	 ||
¡ |j¡}t ||¡}|| _|| _|| _|| _|j|j| _| _t| jd d
… ƒrŸ||f| _|S d| _|S )NFr   Téÿÿÿÿr   ©ÚdtypeÚdevice©ÚcodeÚ	blocksize©r   ©r   é   ©NN)Úis_emptyr   ÚshapeÚAÚBÚtorchÚemptyr   r   ÚFÚquantize_blockwiseÚdequantize_blockwiseÚtoÚquantizeÚfloatÚ
dequantizeÚmatmulÚfw_codeÚbw_codeÚbszÚbsz2Údtype_AÚdtype_BÚanyÚneeds_input_gradÚtensors©Úctxr   r   Úoutr)   r*   r+   r,   ÚB_shapeÚcAÚstateÚfp8AÚcBÚfp8BÚoutputr
   r
   r   Úforward   s0   **
þzMatMulFP8Mixed.forwardc                 C   s  | j rt | j¡t | j¡d d d d d fS | j\}}}}}}}| j\}}d\}}tj|| j	| j
d\}	}
tj|	|
| j
d |j¡}|rRt || ¡  |j¡¡ |j¡}|rzt|jƒdkrd| dd¡ ¡ }n| dd¡ ¡ }t | |j¡|¡ |j¡}||d d d d d fS )Nr   r   r   é   r   r   r   )r   r   Ú
zeros_liker   r   r0   r1   r!   r"   r*   r,   r#   r$   r   r(   ÚtÚlenr   Ú	transposeÚ
contiguous)r3   Úgrad_outputÚ	req_gradAÚ	req_gradBÚ_r   r   Úgrad_AÚgrad_BÚ	cgrad_outr7   Úfp8outÚAtr
   r
   r   ÚbackwardA   s   "
 zMatMulFP8Mixed.backward©NNNr   r   ©Ú__name__Ú
__module__Ú__qualname__Ústaticmethodr<   rL   r
   r
   r
   r   r      ó
    )r   c                   @   r   )ÚMatMulFP8GlobalNr   c                 C   sF  d| _ t|jƒdkrId| _ || _|| _|j}|jd |d kr4tj|jd d… |dd …  |j|jdS tj|jd d… |d d…  |j|jdS t	j
| ¡ |d\}	}
t	 |	|
¡ |j¡}t	j
| ¡ |d\}}
t	 ||
¡ |j¡}t ||¡}|| _|| _|| _|| _|j|j| _| _t| jd d… ƒrž||f| _|S d	| _|S )
NFr   Tr   r   r   r   r   r   )r   r   r   r   r   r   r    r   r   r!   r%   r&   r'   r$   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r
   r
   r   r<   k   s0   **
þzMatMulFP8Global.forwardc                 C   s,  | j rt | j¡t | j¡d d d d d fS | j\}}}}}}}| j\}}d\}}tj| 	¡ | j
d\}	}
t |	|
¡ |j¡}|rOt || ¡  |j¡¡ |j¡}|rt|jƒdkra| dd¡ ¡ }n| dd¡ ¡ }tj| 	¡ | jd\}}
t ||
¡ |j¡}t | |j¡|¡ |j¡}||d d d d d fS )Nr   r   r=   r   r   r   )r   r   r>   r   r   r0   r1   r!   r%   r&   r*   r'   r$   r   r(   r?   r@   r   rA   rB   r)   )r3   rC   rD   rE   rF   r   r   rG   rH   rI   r7   rJ   rK   r6   Úfp8Atr
   r
   r   rL   •   s"   "
 zMatMulFP8Global.backwardrM   rN   r
   r
   r
   r   rT   g   rS   rT   c                   @   s.   e Zd Zeddeƒ fdd„ƒZedd„ ƒZdS )ÚSwitchBackBnbNc                 C   sà  d| _ t|jƒdkrLd| _ || _|| _|| _|jd |jd kr6tj|jd d… |jdd …  |j|j	dS tj|jd d… |jd d…  |j|j	dS |j
}|j}|jd u r\t ¡ |_|jtjkrlt d|j› d¡ t|jƒd	kr~| d|jd ¡ ¡ }tj| tj¡|jd
\}}	}
}}|jdkrÞ|d urÞ|jrÌt |j¡ ¡ }d|d d …|f< d|	d d …|f< |d d …|f }|d d …|f  ¡  ¡ |_||_n(|jd u rÝtj |j!|d\|_|_"n|jsò|jd u ròtj |j!|d\|_|_"d }|jrJt#|dd ƒd urdnd}| $¡  o|jd | %d¡k}|r| ¡ }|j&r#|r)|jd u rI| '¡  t | tj¡¡\}|_(|_)|_*}tj ||d\|_|_"nd}|d urŸ|jsŸt |j¡}||_t +|j|j"|j ,¡ ¡}||j) dd¡ d  ¡  ¡  |j¡|_d|d d …|j ¡ f< d|	d d …|j ¡ f< |d d …|j ¡ f }|j"d }t|ƒd	kr·|d |d |d f}n|d |d f}t  |d¡\}}t -||j||j"¡\}}|d u sß|jtjkrñtj.|||
|j)|d}| |j¡}ntj.|||
|j)d d}| |j¡ /|¡}|d ur|d ur|t 0||j¡7 }|| _1|| _
|| _2|j|j|d u r,d n|j| _3| _4| _5t6| j7d d… ƒrM|	||f| _8||jf| _9ng d¢| _8d| _9|  :d d ¡ t|ƒd	kretj;ndd„ }|| |¡ƒS )NFr   Tr   r   r   z'MatMul8bitLt: inputs will be cast from z to float16 during quantizationr=   )Ú	thresholdç        )Úto_orderÚgradg     À_@Úcol32)Úbiasr   )NNNr   c                 S   s   | S )Nr
   )Úxr
   r
   r   Ú<lambda>D  s    z'SwitchBackBnb.forward.<locals>.<lambda>)<r   r   r   r   r   r\   r   r    r   r   ÚformatBÚoutlier_poolr   Úget_instanceÚfloat16ÚwarningsÚwarnr@   ÚviewrB   r!   Údouble_quantr$   rW   Úhas_fp16_weightsÚuniqueÚcolidxÚlongr?   ÚsubBÚidxÚCxBÚ	transformÚCBÚSBÚgetattrÚis_contiguousÚstrideÚis_trainingÚreset_gradsÚCBtÚSCBÚSCBtÚextract_outliersÚintÚigemmltÚ
mm_dequantÚadd_r(   r7   Ú
grad_shaper-   r.   Ú
dtype_biasr/   r0   r1   Útensor_statesÚsave_for_backwardÚclone)r3   r   r   r4   r\   r7   r_   Úinput_shapeÚCAÚCAtÚSCAÚSCAtÚcoo_tensorArl   ÚsubAÚhas_gradÚis_transposedro   Úcoo_tensorBÚoutlier_idxÚoutliersÚshapeBÚoutput_shapeÚC32AÚSAÚout32ÚSout32r;   Ú
clone_funcr
   r
   r   r<   ¼   s°   ,,

ÿ
€ ú€ü
*
zSwitchBackBnb.forwardc                 C   sÆ  | j r| jd u r
d nt | j¡}t | j¡t | j¡d |d fS | j\}}}}}| j\}}}	| j\}
}| j	}| j
}d  } }}|rH|jd| jd}t|jƒdkrZ| d|jd ¡ ¡ }t | tj¡¡\}}}}}|rrt | ¡ |	¡}|rÜ|jd ur±t |d¡\}}|jd u r“tj|j|dd\|_|_t ||j||j¡\}}t ||||j¡ | j¡ | j ¡}n+|j!d urØ|j!j| j dd "|j# $d	¡ %d
¡¡}t ||¡ | j¡ | j ¡}nt&dƒ‚||d |d fS )Nr   )r   r=   r   r[   T)rY   rA   )Úcopyr   g@ €?z7State must contain either CBt or CB matrix for backward)'r   r\   r   r>   r   r   r0   r1   r€   r_   r7   Úsumr   r@   r   ÚreshaperB   r!   rf   r$   rb   r(   r?   rv   rn   ÚCxBtÚSBtr{   r|   rx   re   r~   r-   ro   Úmul_rw   Ú	unsqueezer   Ú	Exception)r3   rC   Ú	bias_gradrD   rE   rF   Úreq_gradBiasr…   r‰   r   r‡   rl   r_   r7   rG   rH   Ú	grad_biasÚCgradÚCgradtÚSCgradÚSCgradtÚ
coo_tensorÚC32gradÚSgradÚgradA32ÚSgradA32ro   r
   r
   r   rL   G  sD   

ÿþ

ÿ$
&zSwitchBackBnb.backward)rO   rP   rQ   rR   r   r<   rL   r
   r
   r
   r   rV   »   s     rV   c           	      C   sž   | j d }|j d |kr|j d n|j d }g d¢}d\}}t|ƒD ]\}}|||d  kr2|} nq"t|ƒD ]\}}|||d  krJ|} ||fS q7||fS )Nr   r   r   )i   i   r   i   é   é€   é@   r   )r   r   )r   Ú	enumerate)	Úinput_matrixÚweight_matrixÚinput_featuresÚoutput_featuresÚarrayr+   r,   ÚiÚkr
   r
   r   Úget_block_sizesx  s    
"þürµ   r   r   r   r)   r*   r4   r+   r,   c              	   C   ó4   |dks|dkrt | |ƒ\}}t | ||||||¡S ©Nr   )rµ   rT   Úapply©r   r   r)   r*   r4   r+   r,   r
   r
   r   Úmatmul_fp8_globalˆ  ó   rº   c              	   C   r¶   r·   )rµ   r   r¸   r¹   r
   r
   r   Úmatmul_fp8_mixedŒ  r»   r¼   rX   r7   c                 C   s*   |pt ƒ }|dkr||_t | ||||¡S )NrX   )r   rW   rV   r¸   )r   r   r4   r7   rW   r\   r
   r
   r   Úswitchback_bnb  s   
r½   )Nr   r   )NNrX   N)r   rc   Údataclassesr   Ú	functoolsr   r   Úbitsandbytes.functionalÚ
functionalr!   Ú bitsandbytes.autograd._functionsr   r   r   ÚTensorÚtensorÚautogradÚFunctionr   rT   rV   rµ   rz   rº   r¼   r½   r
   r
   r
   r   Ú<module>   s:    TT >((úÿþýü