o
    œÜÓht[  ã                   @   sl  d dl mZmZ d dlZd dlZd dlZdZdZej	ddefd 
¡ Zej	dd	efd 
¡ Zej	dd	efd 
¡ Zej	dd	efd 
¡ ZejejfejejfgZd
dgZg d¢Zg d¢Zg d¢Zg d¢ZejejgZeeeeeeeeeeƒƒZeeeeeeeeeeƒƒZdd„ eD ƒZej j!deeddd„ ƒZ"dZdZej	ddefd 
¡ Zej	dd	efd 
¡ Zej	dd	efd 
¡ Zej	dd	efd 
¡ Ze #d ¡ ddgZ$ejejfejej%j&fgZddgZg d¢ZeeddgddƒZg ZeD ]Z'dZ(e'D ]Z)e)dkre(d7 Z(qúe(d 7 Z(qúe #e(¡ qôd!d"gZd#d$gZejej*ejgZddgZ+ddgZ,eeeeeeeeeee$e+e,ƒƒZeeeeeeeeeee$e+e,ƒƒZd%d„ eD ƒZej j!d&eedd'd(„ ƒZ-dZdZej	ddefd 
¡ Zej	dd	efd 
¡ Zej	dd	efd 
¡ Zej	dd	efd 
¡ Ze #d ¡ ejej.fgZdgZeeddgddƒZg ZeD ]Z'dZ(e'D ]Z)e)dkr½e(d7 Z(q°e(d 7 Z(q°e #e(¡ qªd!d"gZd#d$gZejejgZddgZ/ddgZ+ddgZ,d)d*gZ0eeeeeeeeeee,e/e0ƒƒZeeeeeeeeeee,e/e0ƒƒZd+d„ eD ƒZej j1ej2 3¡  d,d-ej j!d.eedd/d0„ ƒƒZ4ejej%j5fejej%j6fgZd1d2gZeeddgddƒZg ZeD ]Z'dZ(e'D ]Z)e)dkr[e(d7 Z(qNe(d 7 Z(qNe #e(¡ qHd!d"gZd#d$gZejejgZddgZ+eeeeeeeeeeƒƒZeeeeeeeeeeƒƒZd3d„ eD ƒZej j1ej2 3¡  d,d-ej j!deedd4d5„ ƒƒZ7dS )6é    )ÚpermutationsÚproductNé   é   é   é@   )Úsizeé    é`   ÚbmmÚmatmul)©FF©TF©TT©FT)ÚFFÚTFÚTTÚFT)r   r   r   r   )r   r   r   r   c                 C   ó   g | ]}d j |Ž ‘qS ©zNdim1_{}_dim2_{}_dim3_{}_dim4_{}_func_{}_dtype_{}_requires_grad_{}_transpose_{}©Úformat©Ú.0Úvals© r   úI/var/www/html/ai/venv/lib/python3.10/site-packages/tests/test_autograd.pyÚ
<listcomp>   s    ýÿÿr   z9dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose)Úidsc                 C   sœ  t j ¡ s
t d¡ |dkr||d  }||d  }||d  }ttƒD ]¦}|d t jt jfv r¥|d s:||fn||f}	|d sF||fn||f}
t j	|	d|d d}t j	|
d|d d}t j	||fd|d d}t j
j |¡ |d sˆ|d sˆ|d ||ƒ}|d ||ƒ}nT|d s£|d r£|d || ¡ ƒ}|d || ¡ ƒ}n9|d r¾|d s¾|d | ¡ |ƒ}|d | ¡ |ƒ}n|d rÜ|d rÜ|d | ¡ | ¡ ƒ}|d | ¡ | ¡ ƒ}| ¡ }t j||ddd	}|dk ¡  ¡ |d
 k s÷J ‚t j||ddd	}|dk ¡  ¡ |d k sJ ‚t|ƒrS|j |¡ t j ¡  t j
j ||¡ ¡ }| ¡  |j}|j}d |_d |_t j
j ||¡ ¡ }| ¡  |j}|j}d |_d |_|d rbt jj||ddd	 |d r¥| ¡ }t j||ddd	}|dk ¡  ¡ |d k sƒJ ‚t j||ddd	}|dk ¡  ¡ |d k s›J ‚t jj||ddd	 |d t jt jfv ržt j	| ||fd|d d}t j	| ||fd|d d}t j	| ||fd|d d}t j
j |¡ |d ||ƒ}|d ||ƒ}| ¡ }t j||ddd	}|dk ¡  ¡ |d k sJ ‚t jj||ddd	 t|ƒrV|j |¡ t j ¡  t j
j ||¡ ¡ }| ¡  |j}|j}d |_d |_t j
j ||¡ ¡ }| ¡  |j}|j}d |_d |_|d ret jj||ddd	 |d rž| ¡ }t j||ddd	}|dk ¡  ¡ |d k s†J ‚t j||ddd	}|dk ¡  ¡ |d k sžJ ‚|d t jfv rË| | d  } t j	| ||fd|d d}|d rÃ||fn||f}
t j	|
d|d d}t j	| ||fd|d d}t j
j |¡ |d rý|d || ¡ ƒ}|d || ¡ ƒ}n|d ||ƒ}|d ||ƒ}| ¡ }t j||ddd	}|dk ¡  ¡ |d
 k s'J ‚t j||ddd	}|dk ¡  ¡ |d k s?J ‚t|ƒrƒ|j |¡ t j ¡  t j
j ||¡ ¡ }| ¡  |j}|j}d |_d |_t j
j ||¡ ¡ }| ¡  |j}|j}d |_d |_|d r’t jj||ddd	 |d rË| ¡ }t j||ddd	}|dk ¡  ¡ |d k s³J ‚t j||ddd	}|dk ¡  ¡ |d k sËJ ‚q$d S )NúNo GPU found.r   r   r   Úcuda)r   ÚdeviceÚrequires_gradç{®Gáz„?çš™™™™™¹?©ÚatolÚrtolçìQ¸…ë‘?çìQ¸…ë¡?çš™™™™™É?çü©ñÒMbP?ç¸…ëQ¸Ž?ç¸…ëQ¸®?ç333333Ó?ç{®Gáz”?ç
×£p=
Ç?gÙÎ÷Sã¥›?)Útorchr!   Úis_availableÚpytestÚskipÚrangeÚkÚmmr   ÚrandnÚnnÚinitÚxavier_uniform_ÚtÚnumelÚiscloseÚsumÚitemÚanyÚdataÚcopy_ÚsynchronizeÚ
functionalÚmse_lossÚmeanÚbackwardÚgradÚtestingÚassert_closer   )Údim1Údim2Údim3Údim4ÚfuncsÚdtypeÚreq_gradÚ	transposeÚiÚdimAÚdimBÚAÚBÚtargetÚ	out_torchÚout_bnbÚnÚidxÚloss_bnbÚgradA1ÚgradB1Ú
loss_torchÚgradA2ÚgradB2r   r   r   Útest_matmul%   s^  ÿ

ÿþ
ÿ
ÿýýýÿ

ÿþ
ÿ
ýý


ÿþ
ÿ
€ Êre   é   ç        ç      @ÚmatmulltÚswitchback_bnbTF)ÚrepeatÚ ÚTÚFr   r   ÚNTÚNNc                 C   r   )zxdim1_{}_dim2_{}_dim3_{}_dim4_{}_func_{}_dtype_{}_requires_grad_{}_transpose_{}_decomp_{}_has_fp16_weights_{}_has_bias_{}r   r   r   r   r   r   !  ó    z]dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose, decomp, has_fp16_weights, has_biasc           &   	   C   sÖ  t j ¡ s
t d¡ |d s||fn||f}|d s||fn||f}t jd|d |d d fdd}|
dkr>t|ƒ}d|d< ttƒD ]%}|d t j	t j
fv rht j|d|d |d	}|d
kr{t  ¡  d
|d d …|f< W d   ƒ n1 svw   Y  t j|d|d |d	}t j||fd|d |d	}d }d }|
r¨t j|d||d d}| ¡ }t jj |¡ | ¡ }t ¡ }||_|	|_|	sá|d sÍ|d sÍ| ¡  ¡ }tj | t j¡¡\|_}|_}}|j}|d sý|d rý|d || ¡ ƒ}|d ||||d}n|d s|d s|d ||ƒ}|d || ¡ ||d}|
r!||7 }|j|jks4J d|j› d|j› ƒ‚| ¡ }t  || ¡  ¡  !¡ }t j"||ddd}|dk #¡  !¡ ||t jkr\dnd kscJ ‚t j"||ddd}|dk #¡  !¡ |d ks{J ‚|	rht$|ƒrÔ|j% &|¡ t j '¡  t jj (||¡  ¡ }| )¡  |j*}|j*} d |_*d |_*|
r±|j*}!d |_*t jj (||¡  ¡ }"|" )¡  |j*}#|j*}$d |_*d |_*|
rÔ|j*}%d |_*|d rãt j+j,||#ddd |d r\|  ¡ }|dkr
t  | ¡ #¡ dksýJ ‚t  |$¡ #¡ dks	J ‚nt  | ¡ #¡ dksJ ‚t  |$¡ #¡ dks"J ‚t j"| |$ddd}|dk #¡  !¡ |d ks:J ‚t j"| |$ddd}|dk #¡  !¡ |d ksRJ ‚t j+j,| |$ddd |d rht j+ ,|!|%¡ qBd S )Nr    r   r   é   r!   )r   r"   Fé   ©r   r"   r#   rR   rh   ©r"   rR   r#   )ÚstateÚbiasúbnb matmullt received ú but returned r$   r%   r&   r)   g/Ý$•?r*   r+   r,   r-   rg   r.   r/   r0   r1   )-r2   r!   r3   r4   r5   ÚrandintÚlistr6   r7   r8   r   r9   Úno_gradÚcloner:   r;   r<   ÚbnbÚMatmulLtStateÚ	thresholdÚhas_fp16_weightsr=   Ú
contiguousrF   Údouble_quantÚtoÚfloat16ÚCBÚSCBrR   r>   ÚabsrH   rA   r?   r@   rB   rC   rD   rE   rG   rI   rJ   rK   rL   )&rM   rN   rO   rP   rQ   rR   rS   rT   Údecompr   Úhas_biasrV   rW   Úoutlier_dimrU   rX   rY   rZ   rw   Úbias2ÚB2rv   ÚCBtÚSCBtÚcoo_tensorBr[   r\   r]   Úerrr^   r_   r`   ra   Ú	gradBias1rb   rc   rd   Ú	gradBias2r   r   r   Útest_matmullt$  sâ    ÿ
ÿÿüú&.

ÿþÿþ
ÿ

ÿ
€r”   Úfp4Únf4c                 C   r   )zdim1_{}_dim2_{}_dim3_{}_dim4_{}_func_{}_dtype_{}_requires_grad_{}_transpose_{}_has_bias_{}_compress_statistics_{}_quant_type_{}r   r   r   r   r   r   Ï  rq   zthis test requires a GPU)Úreasonzddim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose, has_bias, compress_statistics, quant_typec           !      C   s¼  |d s||fn||f}|d s||fn||f}|dkr$t |ƒ}d|d< ttƒD ]2}|d tjtjfv r[tj|d|d |d}tj|d|d |d}tj||fd|d |d}d }d }|rntj|d||d d}| ¡ }tjj	 
|¡ tjj||	|
d\}}|d sž|d rž|d || ¡ ƒ}|d || ¡ ||d	}n|d s·|d s·|d ||ƒ}|d ||||d	}|r½||7 }|j|jksÏJ d
|j› d|j› ƒ‚| ¡ }t || ¡ ¡  ¡  ¡ }|dkrê|dk sêJ ‚t|ƒr[|j |¡ tj ¡  tjj ||¡ ¡ }| ¡  |j}|j}d |_d |_|r|j}d |_tjj ||¡ ¡ }| ¡  |j}|j}d |_d |_|r@|j} d |_|d rOtjj||ddd |d r[tj || ¡ q(d S )Nr   r   Frs   r!   rt   ru   )Úcompress_statisticsÚ
quant_type)rw   rx   ry   çq=
×£p½?r-   r%   r&   )r{   r6   r7   r2   r8   r   r9   r}   r:   r;   r<   r~   rF   Úquantize_4bitr=   rR   r>   rˆ   ÚfloatrH   rA   rB   rC   rD   r!   rE   rG   rI   rJ   rK   rL   )!rM   rN   rO   rP   rQ   rR   rS   rT   rŠ   r˜   r™   rV   rW   rU   rX   rY   rZ   rw   rŒ   r   Úquant_stater[   r\   r]   r‘   r_   r`   ra   r’   rb   rc   rd   r“   r   r   r   Útest_matmul_4bitÐ  sr   $



€Ärž   Úmatmul_fp8_mixedÚmatmul_fp8_globalc                 C   r   r   r   r   r   r   r   r   )  rq   c                 C   sp  |d s||fn||f}|d s||fn||f}	t |ƒ}d|d< ttƒD ]}
|d tjtjfv rµtj|d|d |d}tj|	d|d |d}tj||fd|d |d}tjj 	|¡ t
j ddd	d
¡ |j¡}t
j dddd
¡ |j¡}|d s“|d r“|d || ¡ ƒ}|d || ¡ ||ƒ}n|d s«|d s«|d ||ƒ}|d ||||ƒ}|j|jks½J d|j› d|j› ƒ‚| ¡ }t || ¡ ¡  ¡  ¡ }|dkrØ|dk sØJ ‚t|ƒrµ|j |¡ tj ¡  tjj ||¡ ¡ }| ¡  |j}|j}d |_d |_tjj ||¡ ¡ }| ¡  |j}|j}d |_d |_|d r+tjj||ddd |d rµ| ¡ }|dkrRt |¡  ¡ dksEJ ‚t |¡  ¡ dksQJ ‚nt |¡  ¡ dks^J ‚t |¡  ¡ dksjJ ‚tj!||ddd}|dk  ¡  ¡ |d ks‚J ‚tj!||ddd}|dk  ¡  ¡ |d ksšJ ‚||  ¡  ¡ }| ¡ dk s«J ‚tjj||ddd q$d S )Nr   r   Frs   r!   rt   Té   rf   rr   é   rx   ry   rš   r-   r%   r&   rg   r.   r/   r0   gú~j¼t“h?r1   )"r{   r6   r7   r2   r8   r   r9   r:   r;   r<   r~   rF   Úcreate_fp8_mapr„   r"   r=   rR   r>   rˆ   rœ   rH   rA   rB   rC   rD   r!   rE   rG   rI   rJ   rK   rL   r@   r?   )rM   rN   rO   rP   rQ   rR   rS   rT   rV   rW   rU   rX   rY   rZ   Úfw_codeÚbw_coder[   r\   r]   r‘   r_   r`   ra   rb   rc   rd   r^   Úgrad_errr   r   r   Útest_matmul_fp8*  st   $




ÿ€Ãr§   )8Ú	itertoolsr   r   r4   r2   Úbitsandbytesr~   r]   r7   rz   ÚtolistrM   rN   rO   rP   r   Ú
bmm_cublasr   Úmatmul_cublasrQ   Ú	str_funcsrS   Úreq_grad_strrT   Ústr_transposeÚfloat32r…   rR   r{   ÚvaluesÚ
str_valuesÚnamesÚmarkÚparametrizere   Úappendr‰   Úresearchrj   ÚcÚstrvalÚvÚbfloat16r   rŠ   r”   Úmatmul_4bitr˜   r™   Úskipifr!   r3   rž   rŸ   r    r§   r   r   r   r   Ú<module>   s&   ÿÿÿüý
 ?

õÿõÿý
 

  F