o
    h	                     @   s:  d dl Z d dlZd dlZd dlmZ e sdejfddZdS d dlZd dlm	Z
 d dlmZmZ ejeji ddd	eji d
dd	eji ddd	eji ddd	eji ddeji d
deji ddeji ddeji ddeji d
deji ddeji ddgdgdejde
jde
jfddZdejfddZdS )    N)is_triton_availablexc                 C   s   d S )N r   r   r   Z/var/www/html/ai/venv/lib/python3.10/site-packages/bitsandbytes/triton/quantize_rowwise.pyquantize_rowwise   s    r   )early_config_pruneestimate_matmul_time      )
num_stages	num_warps      )r   )r   
n_elements)configskey
BLOCK_SIZEP2c                 C   s   t jdd}|| }t d|}|| }	||k }
t j| |	 |
d}t |}t jt |
|ddd}t jd||  }t j	||	 ||
d t 	|| | d S )Nr   )axis)maskg     _@)
tl
program_idarangeloadabsmaxwhere	libdevicellrintstore)x_ptr
output_ptroutput_maxsr   r   r   pidblock_startr   offsetsrow_maskr   abs_xmax_valoutputr   r   r   _quantize_rowwise   s   
r+   c                    s   t j j jt jd}t j jd  jt jd}tdtt	 jd  } j
r.|j
s0J | } fdd}t|  ||| jd |d ||fS )N)devicedtyper   r   r
   c                    s    j d fS )Nr   )shape)metar   r   r   <lambda>A   s    z"quantize_rowwise.<locals>.<lambda>)r   r   )torchemptyr.   r,   int8float16intmathceillog2is_cudanumelr+   )r   r*   r#   r   r   gridr   r   r   r   9   s   )r6   r1   time bitsandbytes.triton.triton_utilsr   Tensorr   tritontriton.languagelanguager   triton.ops.matmul_perf_modelr   r	   autotuneConfigjit	constexprr+   r   r   r   r   <module>   s@    