o
    h                     @   sF  d dl Z d dlZd dlZd dlmZ e s"dejdejfddZdS d dlZd dlm	Z
 d dlmZmZ ejeji dd	d
eji dd	d
eji dd	d
eji d	d	d
eji ddeji ddeji ddeji d	deji ddeji ddeji ddeji d	dgdgdejde
jde
jfddZdejdejfddZdS )    N)is_triton_availablexstate_xc                 C   s   d S )N )r   r   r   r   \/var/www/html/ai/venv/lib/python3.10/site-packages/bitsandbytes/triton/dequantize_rowwise.pydequantize_rowwise   s    r   )early_config_pruneestimate_matmul_time      )
num_stages	num_warps      )r   )r   
n_elements)configskey
BLOCK_SIZEP2c                 C   st   t jdd}|| }t d|}	||	 }
|	|k }t j| |
 |d}t || }|| | }t j||
 ||d d S )Nr   )axis)mask)tl
program_idarangeloadstore)x_ptrr   
output_ptrinv_127r   r   r   pidblock_startr   offsetsrow_maskr   max_valoutputr   r   r   _dequantize_rowwise   s   r%   c              	      s~   t j j jt jd}tdtt jd  } j	r!|j	s#J |
 } fdd}t|  ||d| jd |d |S )N)devicedtyper   r
   c                    s    j d fS )Nr   )shape)metar   r   r   <lambda>>   s    z$dequantize_rowwise.<locals>.<lambda>g@ ?)r   r   )torchemptyr(   r&   float16intmathceillog2is_cudanumelr%   )r   r   r$   r   r   gridr   r*   r   r   7   s    )r0   r,   time bitsandbytes.triton.triton_utilsr   Tensorr   tritontriton.languagelanguager   triton.ops.matmul_perf_modelr   r	   autotuneConfigjit	constexprr%   r   r   r   r   <module>   s@    