o
    hR                     @   sj  d dl mZmZmZmZmZmZ d dlZd dlZd dl	m
  mZ d dlmZmZmZm
Z
 d dlZd dlmZ d dlmZmZ d dlmZ d dlmZmZ edd	d
ZG dd dej
jZG dd dej
jZG dd dej
jZ G dd de
j!Z"G dd de"Z#G dd de"Z$G dd dej
jZ%dd Z&G dd de
j!Z'G dd de
j!Z(G dd  d e
j!Z)dS )!    )AnyDictOptionalTypeVarUnionoverloadN)Tensordevicedtypenn)
QuantState)undo_layoutget_tile_inds)GlobalOptimManager)OutlierTracerfind_outlier_dimsTztorch.nn.Module)boundc                       s   e Zd Z								ddededee dee ded	ed
edee ddf fddZdddZ		 dddZ
dedefddZ  ZS )StableEmbeddingN       @Fnum_embeddingsembedding_dimpadding_idxmax_norm	norm_typescale_grad_by_freqsparse_weightreturnc                    sJ   t  |||||||||	|

 tjj||	d| _t | dddi d S N)r	   weight
optim_bits    )	super__init__torchr   	LayerNormnormr   get_instanceregister_module_override)selfr   r   r   r   r   r   r   r   r	   r
   	__class__ M/var/www/html/ai/venv/lib/python3.10/site-packages/bitsandbytes/nn/modules.pyr$      s    
zStableEmbedding.__init__c                 C      t jj| j |   d S Nr%   r   initxavier_uniform_r    _fill_padding_idx_with_zeror*   r-   r-   r.   reset_parameters4      z StableEmbedding.reset_parametersc                 C   N   | j d ur%t  | j| j  d W d    d S 1 sw   Y  d S d S Nr   r   r%   no_gradr    fill_r5   r-   r-   r.   r4   ?   
   

"z+StableEmbedding._fill_padding_idx_with_zeroinputc              	   C   sD   t || j| j| j| j| j| j}|t	
 }| || jjS r0   )F	embeddingr    r   r   r   r   r   tor%   get_default_dtyper'   r
   r*   r>   embr-   r-   r.   forwardD   s   zStableEmbedding.forward)NNr   FFNNNr   N)__name__
__module____qualname__intr   floatboolr   r$   r6   r4   rE   __classcell__r-   r-   r+   r.   r      s@    	

r   c                       s   e Zd Z							ddededee dee ded	ed
edee dee ddf fddZ	dddZ
	 dddZdedefddZ  ZS )	EmbeddingNr   Fr   r   r   r   r   r   r   r   r	   r   c
           
         s8   t  j|||||||||	d	 t | dddi d S r   )r#   r$   r   r(   r)   )
r*   r   r   r   r   r   r   r   r   r	   r+   r-   r.   r$   V   s   
zEmbedding.__init__c                 C   r/   r0   r1   r5   r-   r-   r.   r6   q   r7   zEmbedding.reset_parametersc                 C   r8   r9   r:   r5   r-   r-   r.   r4   |   r=   z%Embedding._fill_padding_idx_with_zeror>   c              	   C   s&   t || j| j| j| j| j| j}|S r0   )r?   r@   r    r   r   r   r   r   rC   r-   r-   r.   rE      s   
zEmbedding.forward)NNr   FFNNrF   )rG   rH   rI   rJ   r   rK   rL   r   r	   r$   r6   r4   rE   rM   r-   r-   r+   r.   rN   U   sB    	


rN   c                       s   e Zd Zd deej dededed	e	d
d fddZ
ed!dejdee	ef ded
d fddZdd Zed"dedeeeef  deeee	f  ded
ef
ddZed#dedeee	f ded
efddZed#dededed
efddZ fddZ  ZS )$
Params4bitNT@   fp4dataquant_state	blocksizecompress_statistics
quant_typer   c                 C   sD   |d u r	t d}t j| ||}||_||_||_||_||_|S r9   )	r%   emptyr   _make_subclassrT   rU   rV   rS   rR   )clsrR   requires_gradrS   rT   rU   rV   r*   r-   r-   r.   __new__   s   
zParams4bit.__new__Fcudaquantized_statsrZ   c                 K   sL   t j| ||}||_tj||d|_|jj|_|jj	|_
|jj|_|S )N)qs_dictr	   )r%   r   rX   rA   rZ   r   	from_dictrS   rT   nestedrU   rV   )rY   rR   r]   rZ   r	   kwargsr*   r-   r-   r.   from_prequantized   s   


zParams4bit.from_prequantizedc                 C   sP   | j d ur| S | j  |}tjj|| j| j	| j
d\}}|| _|| _ | S )N)rT   rU   rV   )rS   rR   
contiguoushalfr\   bnb
functionalquantize_4bitrT   rU   rV   )r*   r	   ww_4bitrS   r-   r-   r.   r\      s   
zParams4bit.cuda.r*   r	   r
   non_blockingc                 C      d S r0   r-   r*   r	   r
   rj   r-   r-   r.   rA         zParams4bit.toc                 C   rk   r0   r-   r*   r
   rj   r-   r-   r.   rA      rm   tensorc                 C   rk   r0   r-   r*   ro   rj   r-   r-   r.   rA      rm   c                    s   t jjj|i |\}}}}|d ur#|jdkr#| jjjdkr#| |S | jd ur.| j	| t
t j	|||d| j| j| j| j| jd}|S )Nr\   cpur	   r
   rj   )rZ   rS   rT   rU   rV   )r%   _C_nn	_parse_totyperR   r	   r\   rS   rA   rO   r#   rZ   rT   rU   rV   r*   argsra   r	   r
   rj   convert_to_format	new_paramr+   r-   r.   rA      s    

)NTNrP   TrQ   )Fr\   ....)rG   rH   rI   r   r%   r   r   rJ   rL   strr[   classmethodr   r   rb   r\   r   r   r   r	   r
   rA   rM   r-   r-   r+   r.   rO      s    *(	
:&rO   c                       sB   e Zd Zd fdd	Zdd Z fdd	Zd
ejfddZ  Z	S )
Linear4bitTNrQ   c                    s8   t  |||| t| jjd||d| _|| _d| _d S )NF)rZ   rU   rV   )r#   r$   rO   r    rR   compute_dtypecompute_type_is_set)r*   input_featuresoutput_featuresbiasr   rU   rV   r	   r+   r-   r.   r$      s   
zLinear4bit.__init__c                 C   s   |j tjtjfv r|j | _d S |j tjkrM| jtjkr0| |jd kr0t	d tj
ddd | jtjkrO| |jd krQt	d tj
ddd d S d S d S d S )NzInput type into Linear4bit is torch.float16, but bnb_4bit_compute_type=torch.float32 (default). This will lead to slow inference.ignorez.*inference.)messagezInput type into Linear4bit is torch.float16, but bnb_4bit_compute_type=torch.float32 (default). This will lead to slow inference or training speed.z.*inference or training)r
   r%   float32bfloat16r   float16numelshapewarningswarnfilterwarnings)r*   xr-   r-   r.   set_compute_type   s   

zLinear4bit.set_compute_typec                    sd   t  ||| t| jdddur.| jjjdd D ]\}}|r#|n| ||d | < qdS dS )zc
        save weight and bias,
        then fill state_dict with components of quant_state
        rS   NT)packedzweight.)r#   _save_to_state_dictgetattrr    rS   as_dictitemsdetach)r*   destinationprefix	keep_varskvr+   r-   r.   r      s   zLinear4bit._save_to_state_dictr   c                 C   s   | j d ur| j j|jkr| j j|j| j _t| jdd d u r#td | js.| | d| _|j}| j	d ur<|| j	}| j d u rCd n| j | j	}t
j|| j || jjd}||}|S )NrS   zhFP4 quantization state not initialized. Please call .cuda() or .to(device) on the LinearFP4 layer first.T)r   rS   )r   r
   rR   rA   r   r    printr   r   r   re   matmul_4bittrS   )r*   r   	inp_dtyper   outr-   r-   r.   rE      s   


zLinear4bit.forward)TNTrQ   N)
rG   rH   rI   r$   r   r   r%   r   rE   rM   r-   r-   r+   r.   r      s
    r   c                       s   e Zd Zd fdd	Z  ZS )	LinearFP4TNc              	         t  |||||d| d S )NrQ   r#   r$   r*   r   r   r   r   rU   r	   r+   r-   r.   r$   
     zLinearFP4.__init__TNTN)rG   rH   rI   r$   rM   r-   r-   r+   r.   r   	  s    r   c                       s"   e Zd ZdZd fdd	Z  ZS )	LinearNF4a7   Implements the NF4 data type.

        Constructs a quantization data type where each bin has equal area under a standard normal distribution N(0, 1) that
        is normalized into the range [-1, 1].

        For more information read the paper: QLoRA: Efficient Finetuning of Quantized LLMs (https://arxiv.org/abs/2305.14314)

        Implementation of the NF4 data type in bitsandbytes can be found in the `create_normal_map` function in
        the `functional.py` file: https://github.com/TimDettmers/bitsandbytes/blob/main/bitsandbytes/functional.py#L236.
    TNc              	      r   )Nnf4r   r   r+   r-   r.   r$     r   zLinearNF4.__init__r   )rG   rH   rI   __doc__r$   rM   r-   r-   r+   r.   r     s    
r   c                       s   e Zd Z					dddZ fddZe			dd	ed
eee	e
f  deeeef  dedef
ddZedd	edeeef dedefddZedd	edededefddZ fddZ  ZS )
Int8ParamsNTFc                 C   s4   || _ d | _d | _|d u rtd}tj| ||S r9   )has_fp16_weightsCBSCBr%   rW   r   rX   )rY   rR   rZ   r   r   r   r-   r-   r.   r[     s   
zInt8Params.__new__c                    sb   | j r	t |S | j  |}tj|\}}}}}~~|| _t	| d| t	| d| | S )Nr   r   )
r   r#   r\   rR   rc   rd   re   rf   double_quantsetattr)r*   r	   Br   CBtr   SCBtcoo_tensorBr+   r-   r.   r\   -  s   zInt8Params.cuda.r*   r	   r
   rj   r   c                 C   rk   r0   r-   rl   r-   r-   r.   rA   =  s   zInt8Params.toc                 C   rk   r0   r-   rn   r-   r-   r.   rA   F  rm   ro   c                 C   rk   r0   r-   rp   r-   r-   r.   rA   J  rm   c                    sz   t jjj|i |\}}}}|d ur#|jdkr#| jjjdkr#| |S tt	 j
|||d| j| jd}| j|_| j|_|S )Nr\   rq   rr   )rZ   r   )r%   rs   rt   ru   rv   rR   r	   r\   r   r#   rA   rZ   r   r   r   rw   r+   r-   r.   rA   N  s&   

)NTFNNr{   r|   )rG   rH   rI   r[   r\   r   r   r   r   rJ   r	   r
   r}   rL   rA   r   rM   r-   r-   r+   r.   r     s8    
&r   c           
      C   s^   |  | d}|d u rd S | | dd}|dkr-t||j}	t||	| | d< d S d S )Nr    weight_formatrow)getpopr   r	   r   )

state_dictr   local_metadatastrictmissing_keysunexpected_keys
error_msgsr    r   tile_indicesr-   r-   r.   maybe_rearrange_weightg  s   r   c                       sR   e Zd Z		d fdd	Z fddZ fd	d
Zdd ZdejfddZ	  Z
S )Linear8bitLtTF        Nc	           	         s|   t  |||| |rJ dt | _|| _|| j_|| j_|| j_|dkr-|s-d| j_	t
| jj||d| _| t d S )Nzvmemory_efficient_backward is no longer required and the argument is deprecated in 0.37.0 and will be removed in 0.39.0r   Tr   rZ   )r#   r$   re   MatmulLtStatestateindex	thresholdr   memory_efficient_backwarduse_poolr   r    rR   "_register_load_state_dict_pre_hookr   	r*   r   r   r   r   r   r   r   r	   r+   r-   r.   r$   t  s   
zLinear8bitLt.__init__c           
         s   t  ||| d}t| j|}t| j|}| jjd u}||  }|d }	| jjsi|d ur=|r1|n| ||< d||	< d S |d urS|sS|rG|n| ||< d||	< d S |d urk|r[|n| ||< | jj||	< d S d S d S )Nr   r   r   )	r#   r   r   r    r   CxBr   r   formatB)
r*   r   r   r   scb_nameparam_from_weightparam_from_statelayout_reorderedkey_nameformat_namer+   r-   r.   r     s&   
z Linear8bitLt._save_to_state_dictc              	      s   t  ||||||| t|}|D ]4}	|	t|d  }
|
dkrF| jjd u r*td||	 }| jj| | jjd urA| jj| j_|	|	 qd S )Nr   zLoading a quantized checkpoint into non-quantized Linear8bitLt is not supported. Please call module.cuda() before module.load_state_dict())
r#   _load_from_state_dictlistlenr    r   RuntimeErrorcopy_r   remove)r*   r   r   r   r   r   r   r   unexpected_copykey
input_nameinput_paramr+   r-   r.   r     s    
z"Linear8bitLt._load_from_state_dictc                 C   ,   | j j| j_| j j| j_d | j _d | j _d S r0   r    r   r   r   r5   r-   r-   r.   init_8bit_state     zLinear8bitLt.init_8bit_stater   c                 C   s   | j | j_| jjd ur|   | jd ur%| jj|jkr%| jj	|j| j_t
j|| j| j| jd}| jjsJ| jjd urJ| jjd urJ| j`| jj| j_|S N)r   r   )trainingr   is_trainingr    r   r   r   r
   rR   rA   re   matmulr   r   r*   r   r   r-   r-   r.   rE     s   
zLinear8bitLt.forwardTTFr   NN)rG   rH   rI   r$   r   r   r   r%   r   rE   rM   r-   r-   r+   r.   r   s  s    r   c                       s6   e Zd Zd fdd	Zdd Zdd Zd	d
 Z  ZS )OutlierAwareLinearTNc                    s"   t  |||| d | _d| _d S )NF)r#   r$   outlier_dimis_quantized)r*   r   r   r   r	   r+   r-   r.   r$     s   
zOutlierAwareLinear.__init__c                 C      t d)NzJPlease override the `forward_with_outliers(self, x, outlier_idx)` functionNotImplementedError)r*   r   outlier_idxr-   r-   r.   forward_with_outliers     z(OutlierAwareLinear.forward_with_outliersc                 C   r   )NzEPlease override the `quantize_weights(self, w, outlier_idx)` functionr   )r*   rh   r   r-   r-   r.   quantize_weight  r   z"OutlierAwareLinear.quantize_weightc                 C   sf   | j d u rt }| std || j}|| _ | js1| | j| j }| jj	
| d| _d S d S )NzTPlease use OutlierTracer.initialize(model) before using the OutlierAwareLinear layerT)r   r   r(   is_initializedr   get_outliersr    r   r   rR   r   )r*   r   tracerr   rh   r-   r-   r.   rE     s   

zOutlierAwareLinear.forward)TN)rG   rH   rI   r$   r   r   rE   rM   r-   r-   r+   r.   r     s
    r   c                       s:   e Zd Z						d fdd	Zdd Zd	d
 Z  ZS )SwitchBackLinearBnbTFr   Nc	           	         sf   t  |||| t | _|| _|| j_|| j_|| j_|dkr'|s'd| j_	t
| jj||d| _d S )Nr   Tr   )r#   r$   re   r   r   r   r   r   r   r   r   r    rR   r   r+   r-   r.   r$     s   

zSwitchBackLinearBnb.__init__c                 C   r   r0   r   r5   r-   r-   r.   r     r   z#SwitchBackLinearBnb.init_8bit_statec                 C   sF   | j | j_| jjd ur|   tj| | j d | jd| j	 }d S r   )
r   r   r   r    r   r   re   matmul_mixedrd   r   r   r-   r-   r.   rE     s   
(zSwitchBackLinearBnb.forwardr   )rG   rH   rI   r$   r   rE   rM   r-   r-   r+   r.   r     s    r   )*typingr   r   r   r   r   r   r   r%   torch.nn.functionalr   rf   r?   r   r	   r
   bitsandbytesre   bitsandbytes.functionalr    bitsandbytes.autograd._functionsr   r   bitsandbytes.optimr   bitsandbytes.utilsr   r   r   rN   r   	ParameterrO   Linearr   r   r   r   r   r   r   r   r-   r-   r-   r.   <module>   s,    @:?;J\