o
    œÜÓh'  ã                   @   s
  d dl Z d dlmZ d dlmZ d dlmZ d dlZd dlZd dl	Z
d dl	mZ d dlmZmZ d dlmZ ejjej ¡  pEej ¡ dk d	d
dd„ ƒZejjej ¡  dd
dd„ ƒZejjej ¡  dd
ej deeddgddgddgddgƒƒ¡dd„ ƒƒZdS )é    N)Únullcontext)Úproduct)ÚTemporaryDirectory)Ú
functional)Úget_inverse_transform_indicesÚundo_layout)ÚLinear8bitLt)é   é   zJthis test requires a turing-generation or newer GPU, see bitsandbytes docs)Úreasonc                     sŒ   t  dd¡d  t j¡ ¡ } dD ]3\}‰ ‡ fdd„}t||ƒ}|| ƒ}t j ¡  t||ƒ}t j ¡  | ¡ s8J ‚t  	t  
|| ¡¡sCJ ‚qd S )Ni ¨  i 8  é
   )))é   é    Ú
col_turing))r   r   Ú
col_amperec                    s    t j|  ¡ dˆ dd  | j¡S )NÚrow)Ú
from_orderÚto_orderr   )ÚFÚ	transformÚcudaÚtoÚdevice)Úx©Úorder© úM/var/www/html/ai/venv/lib/python3.10/site-packages/tests/test_linear8bitlt.pyÚ<lambda>   s     z)test_layout_exact_match.<locals>.<lambda>)ÚtorchÚrandnr   Úint8r   r   Úsynchronizer   Úis_contiguousÚallÚeq)r   Ú	tile_sizer   Útile_indicesÚcxbÚ
restored_xr   r   r   Útest_layout_exact_match   s   



÷r*   zthis test requires a GPUc                  C   sH  t j dd¡} t jddt jd}t| j| j| jd uddd}d|j	_
tjj| jj ¡ ddd	 | jj¡|_| j|_| ¡ }|  ¡  ¡ } | ¡  ¡  d¡}| ¡  ¡  d¡}| |ƒ ¡ }t  |¡}||  ¡  ¡  ||ƒ ¡ }||  ¡  ¡  t j||d
ds€J ‚t j|j|jddsŒJ ‚|j	jr’J ‚|j	jd usšJ ‚|j	jd u s¢J ‚d S )Ni   i   é   ©ÚdtypeFç      @©Úhas_fp16_weightsÚ	thresholdT©Úrequires_gradr0   g{®Gáz”?©Úatolg{®Gáz„?)r   ÚnnÚLinearr    Úhalfr   Úin_featuresÚout_featuresÚbiasÚstateÚforce_no_igemmltÚbnbÚ
Int8ParamsÚweightÚdataÚcloner   r-   r   Úrequires_grad_ÚfloatÚ
randn_likeÚmeanÚbackwardÚallcloseÚgradr0   ÚCBÚCxB)Úlinearr   Úlinear_customÚx_refÚx_oursÚfx_refÚ	grad_projÚfx_oursr   r   r   Útest_linear_no_igemmlt$   s<   ûÿ
þ
rS   zUhas_fp16_weights, serialize_before_forward, deserialize_before_cuda, force_no_igemmltFTc                 C   sJ  t j dd¡}t jddt jd}t|j|j|jd u| dd}|r$d|j	_
tjj|jj ¡ | | d|_|j|_| ¡ }|r@| ¡ }| ¡  ¡  d¡}||ƒ ¡ }	t  |	¡}
|	|
  ¡  ¡  |sb| ¡ }tƒ ;}tj |d	¡}tj |d
¡}t  | ¡ |¡ t  ||¡ | s”tj |¡dtj |¡ k s”J ‚t  |¡}W d   ƒ n1 s£w   Y  t|j|j|jd u| dd}|r¼d|j	_
|rß| rÃtƒ nt  t!¡ |j"|dd W d   ƒ n1 sÚw   Y  | ¡ }|sì|j"|dd | ¡  ¡  d¡}||ƒ ¡ }||
  ¡  ¡  | s	|s!t j#|	|ddsJ ‚t j#|j$|j$dds#J ‚d S d S )Nr   é`   r+   r,   r.   r/   Tr2   zstate_8bit.pthz	state.pthg      à?)Ústrictgñhãˆµøä>r4   )%r   r6   r7   r    r8   r   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   r   Ú
state_dictrC   rD   rE   rF   rG   r   ÚosÚpathÚjoinÚsaveÚgetsizeÚloadr   ÚpytestÚraisesÚRuntimeErrorÚload_state_dictrH   rI   )r0   Úserialize_before_forwardÚdeserialize_before_cudar=   rL   r   rM   Ústate_dict_8bitÚx_firstÚfx_firstrQ   ÚtmpdirÚstate_path_8bitÚ
state_pathÚnew_state_dictÚnew_linear_customÚx_secondÚ	fx_secondr   r   r   Útest_linear_serializationG   sr   ûÿ
 öûÿþrm   )rW   Ú
contextlibr   Ú	itertoolsr   Útempfiler   r]   r   Úbitsandbytesr>   r   r   Úbitsandbytes.autogradr   r   Úbitsandbytes.nn.modulesr   ÚmarkÚskipifr   Úis_availableÚget_device_capabilityr*   rS   ÚparametrizeÚlistrm   r   r   r   r   Ú<module>   s,    þ

" ÿ