o
    hs&                     @   s  d dl Z d dlmZ d dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZmZ d d	lmZ G d
d de jjZG dd de jjZG dd de jjZG dd dejZeeddZeedddZeeddZG dd de jjZG dd dejZ dS )    N)partial)is_triton_available)dequantize_rowwise)quantize_rowwise)!quantize_columnwise_and_transpose)int8_matmul_rowwise_dequantize)quantize_globalquantize_global_transpose)int8_matmul_mixed_dequanitzec                   @   $   e Zd Zedd Zedd ZdS )_switchback_globalc           	      C   sd   | d|d}t|\}}t|\}}||f| _t|| |||j g | d d dR  S Nviewsizer   r   save_for_backwardr
   t	ctxX_3DWbiasXX_int8state_XW_int8state_W r   Z/var/www/html/ai/venv/lib/python3.10/site-packages/bitsandbytes/nn/triton_based_modules.pyforward   s   
z_switchback_global.forwardc                 C   s   | d|d}d  } }}| j\}}| jd r=t|\}}	t|\}
}t||
 |	|d jg | d d dR  }| jd rNt	
| ||j}| jd rY|jdd}|||fS Nr   r         dim)reshaper   r   needs_input_gradr   r	   r
   r   r   torchmatmultodtypesum)r   G_3DGgrad_Xgrad_W	grad_biasr   r   G_int8state_Gr   r   r   r   r   backward$   s    




z_switchback_global.backwardN__name__
__module____qualname__staticmethodr    r4   r   r   r   r   r      s
    
r   c                   @   r   )_switchback_vectorrizec           	      C   sd   | d|d}||f| _t|\}}t|\}}t|| |||j g | d d dR  S r   )r   r   r   r   r   r   r   r   r   r   r    ?   s   
z_switchback_vectorrize.forwardc                 C   s   | j \}}|d|d}d  } }}| jd r=t|\}}	t|\}
}t||
 |	|d jg | d d dR  }| jd rNt	
| ||j}| jd rY|jdd}|||fS r!   )r   r&   r   r'   r   r   r   r   r   r(   r)   r*   r+   r,   )r   r-   r   r   r.   r/   r0   r1   r2   r3   r   r   r   r   r   r4   P   s    




z_switchback_vectorrize.backwardNr5   r   r   r   r   r:   =   s
    
r:   c                   @   r   ) _switchback_global_mem_efficientc           
      C   sn   | d|d}| }t|\}}~t|\}}	||||	f| _t|| ||	|j g |d d dR  S r   r   )
r   r   r   r   r   X_3D_szr   r   r   r   r   r   r   r    j   s   
z(_switchback_global_mem_efficient.forwardc                 C   s   | d|d}| }d  } }}| j\}}}	}
| jd r2t||}~t| ||j	}~| jd r=|j
dd}| jd ret|\}}~|	  }	t||	 ||
d jg |d d dR  }|||fS )Nr   r"   r#   r   r$   )r&   r   r   r'   r   r(   r)   r   r*   r+   r,   r   
contiguousr
   r   )r   r-   r.   G_3D_szr/   r0   r1   r   r   r   r   real_Xr2   r3   r   r   r   r4   ~   s*   





z)_switchback_global_mem_efficient.backwardNr5   r   r   r   r   r;   h   s
    
r;   c                       sN   e Zd Z					ddededededef
 fd	d
Zdd Zdd Z  ZS )SwitchBackLinearTNFin_featuresout_featuresr   vector_wise_quantizationmem_efficientc                    sf   t  ||||| tstd|| _| jr't| _|r%td td d S d S |r.t	| _d S t
| _d S )NzCould not import triton. Please install triton to use SwitchBackLinear.
                               Alternatively, you can use bnb.nn.SwitchBackLinearBnb, but it will be slowerz<mem efficient is not supported for vector-wise quantization.r"   )super__init__r   ImportErrorrC   r:   _fnprintexitr;   r   )selfrA   rB   r   devicer+   rC   rD   	__class__r   r   rF      s   


zSwitchBackLinear.__init__c                 C   sL   t d | jrt| j\}}nt| j\}}| d| | d| | `d S )Nz=> preparing for eval.r   r   )rI   rC   r   weightr   register_buffer)rK   r   r   r   r   r   prepare_for_eval   s   z!SwitchBackLinear.prepare_for_evalc                 C   s   | j r| j|| j| jS t| ds| j|| j| jS |d|d}t|\}}| j	rIt
|| j || j| jjg | d d dR  S t|| j || j| jjg | d d dR  S )Nr   r   )trainingrH   applyrO   r   hasattrr   r   r   rC   r   r   r   r   r
   )rK   xr   r   r   r   r   r   r       s2   
zSwitchBackLinear.forward)TNNFF)	r6   r7   r8   intboolrF   rQ   r    __classcell__r   r   rM   r   r@      s&    r@   F)rC   T)rC   rD   c                   @   s&   e Zd ZedddZedd ZdS )StandardLinearFunctionNc                 C   sj   | d|d}| ||| || }|d ur%||d|7 }|j g | d d dR  S )Nr   r   )r   r   r   r)   r   	unsqueeze	expand_as)r   inputrO   r   r   outputr   r   r   r       s    zStandardLinearFunction.forwardc           	      C   s   | j \}}}|d|d}d  } }}| jd r1|||jjg | d d dR  }| jd rA| ||j}|d urO| jd rO|	d}|||fS )Nr   r   r"   r#   )
saved_tensorsr&   r   r'   r)   r*   r+   r   r   r,   )	r   grad_output_3Dr\   rO   r   grad_output
grad_inputgrad_weightr1   r   r   r   r4      s   
.


zStandardLinearFunction.backwardNr5   r   r   r   r   rY      s
    	rY   c                   @   s   e Zd Zdd ZdS )StandardLinearc                 C   s   t || j| jS rc   )rY   rS   rO   r   )rK   rU   r   r   r   r      s   zStandardLinear.forwardN)r6   r7   r8   r    r   r   r   r   rd      s    rd   )!r(   torch.nnnntime	functoolsr    bitsandbytes.triton.triton_utilsr   &bitsandbytes.triton.dequantize_rowwiser   $bitsandbytes.triton.quantize_rowwiser   5bitsandbytes.triton.quantize_columnwise_and_transposer   2bitsandbytes.triton.int8_matmul_rowwise_dequantizer   #bitsandbytes.triton.quantize_globalr   r	   0bitsandbytes.triton.int8_matmul_mixed_dequanitzer
   autogradFunctionr   r:   r;   Linearr@   SwitchBackLinearGlobal"SwitchBackLinearGlobalMemEfficientSwitchBackLinearVectorwiserY   rd   r   r   r   r   <module>   s(    -+0F