o
    œÜÓhZ  ã                   @   sì  d dl mZ d dlZd dlZd dlmZ d dlZG dd„ dƒZG dd„ dejjƒZ	dd	„ Z
d8dd„ZG dd„ dejjƒZG dd„ dejƒZddgZeZdd„ eD ƒZejjdeeddd„ ƒZdd„ Zej dddg¡ej ddg¡d d!„ ƒƒZejjd"d9d$d%„ejjgd&d'gdd(d)„ ƒZg Ze ejj¡ e ejj¡ e ejj¡ e ejj¡ e d*d%„ ¡ e d+d%„ ¡ e d,d%„ ¡ e d-d%„ ¡ e d.d%„ ¡ g d/¢Zejjej  !¡  d0d1ejjd"eedd2d3„ ƒƒZ"d4d5„ Z#d6d7„ Z$dS ):é    )ÚproductN)Únnc                   @   s   e Zd Zdd„ ZdS )ÚMockArgsc                 C   s   |D ]
}t | ||| ƒ qd S ©N)Úsetattr)ÚselfÚinitial_dataÚkey© r
   úH/var/www/html/ai/venv/lib/python3.10/site-packages/tests/test_modules.pyÚ__init__   s   ÿzMockArgs.__init__N)Ú__name__Ú
__module__Ú__qualname__r   r
   r
   r
   r   r   
   s    r   c                       s&   e Zd Zd‡ fdd„	Zdd„ Z‡  ZS )	ÚMLP8bitTFç        c                    s>   t ƒ  ¡  tjj|||||d| _tjj|||||d| _d S )N)Úhas_fp16_weightsÚmemory_efficient_backwardÚ	threshold)Úsuperr   Úbnbr   ÚLinear8bitLtÚfc1Úfc2)r   Údim1Údim2r   r   r   ©Ú	__class__r
   r   r      s   
þþzMLP8bit.__init__c                 C   s   |   |¡}|  |¡}|S r   )r   r   ©r   Úxr
   r
   r   Úforward   s   

zMLP8bit.forward)TFr   ©r   r   r   r   r    Ú__classcell__r
   r
   r   r   r      s    r   c                  C   s   t g ƒ} d| _d| _d| _| S )NÚvectorÚfulli'  )r   Ú
quant_typeÚuse_8bit_trainingÚ	clip_freq)Úargsr
   r
   r   Úget_args"   s
   r)   ç:Œ0âŽyE>çñhãˆµøä>é
   c                 C   sV   t  | |||¡}|dk ¡  ¡ }||kr)td|› d|› ƒ t j | |||¡ d S d S )Nr   z"Too many values not close: assert z < )ÚtorchÚiscloseÚsumÚitemÚprintÚtestingÚassert_close)ÚaÚbÚatolÚrtolÚcountÚidxÚsumvalr
   r
   r   Úassert_all_approx_close*   s   þr;   c                   @   s¤   e Zd Zed dd„ƒZd!dd„Zdd	„ Zd
d„ Zd"dd„Zed"dd„ƒZ	edd„ ƒZ
edd„ ƒZedd„ ƒZedd„ ƒZed#dd„ƒZed$dd„ƒZedd„ ƒZdS )%ÚLinearFunctionFç      @c                 C   st   |rt jntj}t tj¡t d¡ }t | ¡}|| }| | d } || ƒ} d| | dk< d| | dk < | d | } | S )Nç       @é   iÿÿÿ)r<   Úround_stoachasticr-   ÚroundÚmathÚsqrtÚpiÚstd)r   Ú
stochasticÚ
trim_valueÚ
round_funcÚnormrE   Úmax1r
   r
   r   Úget_8bit_linear_trimmed3   s   ÿ
z&LinearFunction.get_8bit_linear_trimmedé   c                 C   sò   |dkrt  | ¡ ¡  ¡ }t  | | d ¡ t j¡}||fS |dkr>t jt  | ¡|dd}t  | | d ¡ t j¡}||fS |dkrwt j| |dd ¡ }t j| |dd ¡ }|| d }t  d| | |  | ¡ t j¡}|| ¡ | ¡ ffS d S )NÚlinearr?   r#   T©ÚdimÚkeepdimzmin-maxr>   )	r-   ÚabsÚmaxÚfloatrA   ÚtoÚint8ÚamaxÚamin)r   r%   rO   rJ   ÚxqÚmaxAÚminAÚscaler
   r
   r   ÚquantD   s   "zLinearFunction.quantc                 C   sÈ   |dkr|| d }|   ¡ |  |¡S |dkrb|   ¡ }t| jƒdkr.t|jƒdkr.| d¡}t| jƒdkrAt|jƒdkrA| d¡}t|jƒdkrQ|| ¡ d 9 }n||d 9 }||d 9 }| |¡S d S )NrM   i?  r#   é   é   r   r?   )rS   rT   ÚlenÚshapeÚsqueezeÚt)rX   ÚS1ÚS2Údtyper%   rI   r   r
   r
   r   ÚdequantV   s   


zLinearFunction.dequantc                 C   sÄ   |  ¡  ¡  d¡|d |d   }|   ¡ }t| jƒdkr(t|jƒdkr(| d¡}t| jƒdkr;t|jƒdkr;| d¡}t|jƒdkrK|| ¡ d 9 }n||d 9 }||d d 9 }||7 }| |¡S )Nr   rL   r]   r^   r?   )rS   rb   r/   r_   r`   ra   rT   )rX   ÚAÚBÚSAÚSBre   Úoffsetr   r
   r
   r   Údequant_min_maxk   s   "


zLinearFunction.dequant_min_maxc                 C   s>   |rt jntj}t | ¡ ¡ }| | d } || ƒd | } | S )Nr?   )r<   r@   r-   rA   rQ   rR   )r   rF   rH   rJ   r
   r
   r   Úget_8bit_linearz   s   ÿzLinearFunction.get_8bit_linearc                 C   sR   |rt jntj}tjt | ¡|dd}d||dk< | d | } || ƒd | } | S )NTrN   g      ð?r   r?   )r<   r@   r-   rA   rV   rQ   )r   rO   rF   rH   rJ   r
   r
   r   Úget_8bit_vector_wise„   s   ÿz#LinearFunction.get_8bit_vector_wisec                 C   sJ   t  | ¡}t  | ¡}|t  |¡ }t  |¡}|t  |¡||k  | j¡  S r   )r-   ÚsignrQ   ÚfloorÚ	rand_likerT   re   )r   ro   ÚabsxÚdecimalÚrdmr
   r
   r   r@      s
   


z LinearFunction.round_stoachasticc                 C   sR   t jj|d | j¡}t jj| j|d\}}t j |||¡}| ¡ }|  	|¡ |S )N)Ún©Úcode)
r   Ú
functionalÚcreate_dynamic_maprT   ÚdeviceÚquantize_blockwiseÚdataÚdequantize_blockwiseÚhalfÚcopy_)ÚwÚexponent_bitsrw   ÚabsmaxÚCÚoutr
   r
   r   Úfake_8bit_storage—   s   
z LinearFunction.fake_8bit_storagec                 C   sd   t jj| j|jd}|t t |¡¡ }t jj| j|d\}}t j 	|||¡}| 
¡ }|  |¡ |S )N)rk   rv   )r   rx   Úestimate_quantilesr|   rk   r-   rR   rQ   r{   r}   r~   r   )r€   r(   rw   r‚   rƒ   r„   r
   r
   r   Úfake_8bit_storage_quantile    s   
z)LinearFunction.fake_8bit_storage_quantilec                 C   sJ   t jd| jd}tjj| j|d\}}tj ||¡}| ¡ }|  	|¡ |S )Né   ©rz   )Úrand)
r-   rŠ   rz   r   rx   r{   r|   r}   r~   r   )r€   rŠ   r‚   rƒ   r„   r
   r
   r   Úfake_8bit_storage_stoachstic®   s   
z+LinearFunction.fake_8bit_storage_stoachsticé   c                 C   sî   t j|  ¡ ddd}tjt |¡ddd\}}|d d …d |…f }|d d …d |…f }t |¡}|jd|t |¡d | 	¡ }|| }d||< t
j ¡ }| | j¡}t
j |j¡\}}	t
jj||	|d	 |||< | ¡  | j¡}
|  |
¡ |
S )
Nz(h b) -> h bé   )r5   rL   T)rO   Ú
descending)rO   ÚindexÚsrcr   ©r„   )ÚeinopsÚ	rearrangeÚflattenr-   ÚsortrQ   Ú
zeros_likeÚscatter_Ú	ones_likeÚboolr   rx   ry   rT   rz   r{   r|   r}   Úviewr`   r   )r€   ÚtopkÚ	blocked_wÚmax_valr9   ÚmaskÚvaluesrw   r‚   rƒ   Úunblocked_wr
   r
   r   Úfake_8bit_storage_with_max·   s"   


z)LinearFunction.fake_8bit_storage_with_maxNc                 C   s¢   |j dkr0tj||jdd\}}tj||jdd\}}tj || ¡ ¡}	t |	|||j	|j¡}
nt
 d||¡}
|  |||¡ || _|d urO|
| d¡ |
¡7 }
|
S )NÚoffrL   ©rO   r]   zbsi,oi->bsor   )r&   r<   r\   r%   r   rx   Úigemmrb   rf   re   r-   ÚeinsumÚsave_for_backwardr(   Ú	unsqueezeÚ	expand_as)Úctxr   ÚweightÚbiasr(   Úweight8rc   Úx8rd   ÚoutputqÚoutputr
   r
   r   r    Ö   s   
ÿ
zLinearFunction.forwardc                 C   s˜  | j \}}}| j}d}d  } }}	|d ur| jd r| d¡}	|jdkrVtj||jddgd\}
}tj||jddgd\}}tj	 
|
|¡}t ||||j|j¡}| |¡}np|jdkrºtj||jddgd\}
}tj||jddgd\}}tj|tjd}tj	j
|
||d	 t ||||j|j¡}tj||jdd\}
}tj||jdd\}}tj	 
|
|¡}t ||||j|j¡}n| |¡}t d
||¡}|||	d fS )NFr]   r   zforward+wgradrL   r£   r$   )re   r‘   zbsi,bso->oi)Úsaved_tensorsr(   Úneeds_input_gradr/   r&   r<   r\   r%   r   rx   r¤   rf   re   Úmatmulr-   r–   Úint32r¥   )r©   Úgrad_outputr   rª   r«   r(   rF   Ú
grad_inputÚgrad_weightÚ	grad_biasÚgrad_output8rc   r­   rd   Úgrad_weight8r¬   ÚS3Úgrad_input8r
   r
   r   Úbackwardï   sJ   


ÿÿ

ÿÿ
ÿÿ
zLinearFunction.backward)Fr=   )rL   )F)rŒ   )NN)r   r   r   ÚstaticmethodrK   r\   rf   rl   rm   rn   r@   r…   r‡   r‹   r¡   r    r¼   r
   r
   r
   r   r<   2   s.    







r<   c                       s&   e Zd Zd‡ fdd„	Zdd„ Z‡  ZS )Ú
Linear8bitTNc                    s†   t ƒ  ¡  || _|| _|| _t t ||¡¡| _	|r$t t |¡¡| _
n|  dd ¡ tjj | j	¡ | j
d urAtjj | j
¡ d S d S )Nr«   )r   r   Úinput_featuresÚoutput_featuresr(   r   Ú	Parameterr-   Úemptyrª   r«   Úregister_parameterÚinitÚxavier_uniform_Úzeros_)r   r¿   rÀ   r«   r(   r   r
   r   r   #  s   

ÿzLinear8bit.__init__c                 C   s    | j | j_ t || j| j| j¡S r   )Útrainingr(   r<   Úapplyrª   r«   r   r
   r
   r   r    3  s   
zLinear8bit.forward)TNr!   r
   r
   r   r   r¾   "  s    r¾   r   r=   c                 C   s   g | ]}d |› ‘qS )Ú
threshold_r
   )Ú.0Úvalsr
   r
   r   Ú
<listcomp>;  s    rÌ   r   )Úidsc                 C   sŽ   t jjdd| d ¡  ¡ }|jjjdksJ ‚|jjt	j
ksJ ‚| ¡  tdƒD ]}t	jddddd ¡ }||ƒ}|d	krD|jjd usDJ ‚q'd S )
Né    é@   )r   Úcudaéd   é   rŒ   r‰   rL   )r   r   r   rÐ   r~   rª   rz   Útypere   r-   Úfloat16ÚevalÚrangeÚrandnÚstateÚCxB)r   Úl1ÚiÚb1Úo1r
   r
   r   Útest_linear8bitlt_inference>  s   €ürÞ   c                  C   sz  t jjdd„ tdƒD ƒŽ } t jjdd„ tdƒD ƒŽ }| d jj |d jj¡ | d jj |d jj¡ | d jj |d jj¡ | d jj |d jj¡ tj	j
|  ¡ dd}tj	j
| ¡ dd}d	}td	ƒD ]Ò}t jd
dddd ¡ }| |ƒ}||ƒ}| ¡ }	| ¡ }
|	 ¡  |
 ¡  |dkr¥| d jjd us›J ‚| d jjd us¥J ‚|dkr|| dkr| ¡  | d¡ | ¡  | d¡ t| d j|d jdddd t| d j|d jdddd | d jj |d jj¡ | d jj |d jj¡ | d jj |d jj¡ | d jj |d jj¡ qht jj| d jj|d jjddd t jj| d jj|d jjddd qhd S )Nc                 S   ó"   g | ]}t j d d ¡ ¡  ¡ ‘qS ©rÎ   )r   r   r   rÐ   r~   ©rÊ   rÛ   r
   r
   r   rÌ   M  ó   " z:test_linear8bitlt_accumulated_gradient.<locals>.<listcomp>r]   c                 S   rß   rà   )r-   r   ÚLinearrÐ   r~   rá   r
   r
   r   rÌ   N  râ   r   rL   gü©ñÒMbP?)Úlrr,   rÒ   rŒ   rÎ   rÐ   r‰   TgÍÌÌÌÌÌð?ç{®Gáz„?)r7   r6   r8   ©r6   r7   )r-   r   Ú
SequentialrÖ   rª   r|   r   r«   r   ÚoptimÚ	Adam32bitÚ
parametersr×   r~   Úmeanr¼   rØ   rÙ   ÚstepÚ	zero_gradr;   r2   r3   Úgrad)rÚ   Úl2Úopt1Úopt2Ú	acc_stepsrÛ   rÜ   rÝ   Úo2Úloss1Úloss2r
   r
   r   Ú&test_linear8bitlt_accumulated_gradientL  sL   

ÿÿ$&ârö   r>   r   Fc                 C   sz  t jjdd| d|d ¡  ¡ }|jjtjksJ ‚| 	¡  t
dƒD ]}tjddddd	 ¡ }||ƒ}|jtjks9J ‚q tdd| dd
 ¡ }|jjjtjksNJ ‚|jjjtjksXJ ‚t
dƒD ]3}tjddddd	 ¡ }||ƒ}|jtjksuJ ‚| dkr‚|jjjd us‚J ‚| dkr|jjjd usJ ‚q\tdd| dd
 ¡  ¡ }|jjjtjks¦J ‚|jjjtjks°J ‚t
dƒD ]3}tjddddd	 ¡ }||ƒ}|jtjksÍJ ‚| dkrÚ|jjjd usÚJ ‚| dkrç|jjjd usçJ ‚q´tdd| dd
 ¡  ¡ }t
dƒD ]8}tjddddd	 ¡ }||ƒ}|jtjksJ ‚| dkr!|jjjd us!J ‚| dkr0|jjjd us0J ‚qø|jjjtjks<J ‚|jjjtjksGJ ‚tdd| d|d ¡  d¡}t
dƒD ]9}tjddddd	 ¡ }||ƒ}|jtjkssJ ‚| dkr‚|jjjd us‚J ‚| dkr‘|jjjd us‘J ‚qY|jjjtjksžJ ‚|jjjtjks©J ‚|jjjjdks´J ‚|jjjjdks¿J ‚tdd| d|d}|jj ¡  ¡ |jj ¡  ¡ }}| ¡  ¡ }t
dƒD ]9}tjddddd	 ¡ }||ƒ}|jtjksýJ ‚| dkr|jjjd usJ ‚| dkr|jjjd usJ ‚qã|jjjtjks(J ‚|jjjtjks3J ‚|jjjjdks>J ‚|jjjjdksIJ ‚|r¹tjdddddtjd}||ƒ}|jtjkseJ ‚|jskJ ‚t |¡}	| ¡  ||	  ¡  ¡  |	 d¡| ¡  | ¡  }
|
 ¡  ¡ }tjj|j |
dd| d tj!|j |
d| dd}|dk ¡  "¡ | #¡ d ks»J ‚d S d S )NrÎ   rÏ   F)r   r   r   rÑ   rÒ   rŒ   rÐ   r‰   )r   r   r   T)rz   Úrequires_gradre   r]   çš™™™™™©?)r7   r6   rå   gš™™™™™¹?ræ   g{®Gázt?)$r   r   r   rÐ   r~   rª   re   r-   rU   rÕ   rÖ   r×   rÔ   r   r   r   rØ   r9   rT   rz   rÓ   Úcloner÷   Ú
randn_likerí   r/   r¼   r”   rQ   rë   r2   r3   rî   r.   r0   Únumel)r   r   rÚ   rÛ   rÜ   rÝ   ÚmlpÚw1Úw2Ú	grad_projÚgrad_refr[   r9   r
   r
   r   Ú!test_linear8bitlt_no_fp16_weightsz  sÀ   €ý€ý

€

€
ÿ"

€
"òr  ÚmoduleTc                 C   s   t jj| ||ddS )NF)r«   r   )r   r   r   )ÚninÚnoutr«   r
   r
   r   Ú<lambda>é  ó    r  ÚInt8LtÚFP4c                 C   sì   | ddƒ  ¡ }|jjtjtjfv sJ ‚|jjtjksJ ‚tdƒD ]}tj	ddddd 
¡ }||ƒ}|jjtjks:J ‚q | dddd	  ¡ }|jjtjtjfv sPJ ‚|jd u sWJ ‚tdƒD ]}tj	ddddd 
¡ }||ƒ}|jd u ssJ ‚q[d S )
NrÎ   rÏ   rÑ   rÒ   rŒ   rÐ   r‰   F)r«   )rÐ   rª   re   r-   rU   Úuint8r«   Úfloat32rÖ   r×   r~   rÔ   )r  rÚ   rÛ   rÜ   rÝ   r
   r
   r   Útest_linear_kbit_fp32_biasé  s   ýr  c                 C   ó   t jj| |ddS ©NT)Úcompress_statistics)r   r   Ú	LinearFP4©Úd1Úd2r
   r
   r   r    ó    c                 C   r  r  )r   r   Ú	LinearNF4r  r
   r
   r   r    r  c                 C   ó   t jj| |tjdS ©N©Úcompute_dtype)r   r   r  r-   r
  r  r
   r
   r   r    r  c                 C   r  r  )r   r   r  r-   rÔ   r  r
   r
   r   r    r  c                 C   r  r  )r   r   r  r-   Úbfloat16r  r
   r
   r   r  	  r  )	r  Ú4bitr  ÚNF4zFP4+CzNF4+CzNF4+fp32zNF4+fp16zNF4+bf16zthis test requires a GPU)Úreasonc                 C   sR  d}d}d}t jtj  ||¡tj  |d¡gŽ }d|d j_tj j |d j¡ tj j |d j¡ t jtj  ||¡| |dƒgŽ }|d j ¡  	|d j¡ |d j ¡  	|d j¡ |d j
 ¡  	|d j
¡ |d j
 ¡  	|d j
¡ | ¡  ¡ }| ¡  ¡ }| ¡  d¡}g }g }g }g }	td	ƒD ]ç}
t ||¡ ¡  ¡ }||ƒ}||ƒ}| ¡  ¡  | ¡  ¡  |d jj}|d jj}|d j
j}|d j
j}||  ¡  ¡ }||  ¡  ¡ }|| ¡  ¡ d
  }|| ¡  ¡ d
  }| | ¡  ¡ ¡ | | ¡  ¡ ¡ | | ¡  ¡ ¡ |	 | ¡  ¡ ¡ t| tj jƒr/t||dddd tjj||ddd nt||dddd tjj||ddd | ¡  | ¡  |d jjd u sb|d jj ¡  ¡ dksbJ ‚|d jjd u sz|d j
j ¡  ¡ dkszJ ‚q“tdt|ƒt |ƒ ƒ tdt|ƒt |ƒ ƒ tdt|ƒt |ƒ ƒ tdt|	ƒt |	ƒ ƒ d S )Né   é%   éS   r,   FrL   r   rÐ   rÑ   g•Ö&è.>gü©ñÒMb€?rø   )r6   r7   r8   ræ   g¸…ëQ¸Ž?g{®Gáz”?r„   rî   zrel outzrel grad)!r   rç   r-   rã   rª   r÷   rÄ   Úkaiming_normal_Údetachr   r«   r~   rÐ   rT   rÖ   r×   rë   r¼   rî   rQ   rS   Úappendr0   Ú
isinstancer   r   r;   r2   r3   rí   r/   r1   r_   )r  r5   r   r   ÚrefÚkbitÚerrs1Úerrs2Úrelerrs1Úrelerrs2rÛ   ÚbatchÚout1Úout2Úgrad1Úgrad2Úbgrad1Úbgrad2Úerr1Úerr2Úrelerr1Úrelerr2r
   r
   r   Útest_kbit_backprop  sf   "00€r5  c                  C   sh  d} d}t  | |¡ ¡ }t j ||d ¡ ¡ }tjj ||d ¡ ¡ }t j |d |¡ ¡ }tjj |d |¡ ¡ }|jj	 
|jj	¡ |jj	 
|jj	¡ |jj	 
|jj	¡ |jj	 
|jj	¡ |t jj ||ƒ¡ƒ}|t jj ||ƒ¡ƒ} ||   ¡  ¡ }| ¡  ¡  |  ¡  ¡  |jj|jj  ¡  ¡ }	|jj|jj  ¡  ¡ }
|dk s¦J ‚|	dk s¬J ‚|
dk s²J ‚d S )Nr,   rˆ   r]   rø   gñhãˆµøô>)r-   r×   rÐ   r   rã   r   ÚresearchÚLinearFP8Mixedrª   r|   r   r«   rx   ÚgelurQ   rë   r¼   rî   )r5   ÚhÚinpÚfp32Úfp8Úfp32bÚfp8br4   ÚerrÚgraderrÚbgraderrr
   r
   r   Útest_fp8linearH  s*   rB  c                     sž  d‰ t jtdd( tj‡ fdd„tdƒD ƒŽ } |  ¡ } t dˆ ¡ ¡  	¡ }| |ƒ W d   ƒ n1 s4w   Y  t jtdd( tj‡ fdd„tdƒD ƒŽ } |  ¡ } t d	ˆ ¡ ¡  	¡ }| |ƒ W d   ƒ n1 skw   Y  t  t¡H}tj‡ fd
d„tdƒD ƒŽ } |  ¡ } t dˆ ¡ ¡  	¡ }| |ƒ tj‡ fdd„tdƒD ƒŽ } |  ¡ } t d	ˆ ¡ ¡  	¡ }| |ƒ W d   ƒ n1 sÀw   Y  t
|ƒdksÍJ ‚d S )NrÏ   zinference or training)Úmatchc                    ó    g | ]}t jjˆ ˆ tjd ‘qS ©r  ©r   r   Ú
Linear4bitr-   r
  rá   ©r   r
   r   rÌ   j  ó     z&test_4bit_warnings.<locals>.<listcomp>r,   z
inference.c                    rD  rE  rF  rá   rH  r
   r   rÌ   o  rI  rL   c                    rD  rE  rF  rá   rH  r
   r   rÌ   v  rI  c                    rD  rE  rF  rá   rH  r
   r   rÌ   {  rI  r]   )ÚpytestÚwarnsÚUserWarningr   rç   rÖ   rÐ   r-   rŠ   r~   r_   )Únetr:  Úrecordr
   rH  r   Útest_4bit_warningsf  s0   
ü
ü
örO  )r*   r+   r,   )T)%Ú	itertoolsr   rJ  r-   r   Úbitsandbytesr   r   ÚModuler   r)   r;   ÚautogradÚFunctionr<   r¾   r   rŸ   ÚnamesÚmarkÚparametrizerÞ   rö   r  r  r  Úmodulesr"  r   rG  r  ÚskipifrÐ   Úis_availabler5  rB  rO  r
   r
   r
   r   Ú<module>   sN    
 q
."m
;