o
    h1                     @   s   d dl Z d dlZd dlZd dlmZ d dlmZ d dl	m
Z
 G dd de
ZG dd de
ZG dd de
ZG d	d
 d
e
ZG dd de
ZG dd de
ZG dd dejjZdS )    N)Optimizer2Statec                       "   e Zd Z		d fdd	Z  ZS )AdamMbP?g?g+?:0yE>r   F    N   d   Tc                    s(   t  jd||||||||	|
||d d S )Nadamis_pagedsuper__init__selfparamslrbetasepsweight_decayamsgrad
optim_bitsargsmin_8bit_sizepercentile_clipping
block_wiser   	__class__ M/var/www/html/ai/venv/lib/python3.10/site-packages/bitsandbytes/optim/adam.pyr         (zAdam.__init__r   r   r   r   Fr   Nr	   r
   TF__name__
__module____qualname__r   __classcell__r    r    r   r!   r          r   c                       r   )Adam8bitr   r   r   r   Fr   Nr	   r
   Tc                    (   t  jd|||||d||	|
||d d S )Nr      r   r   r   r   r    r!   r      r"   zAdam8bit.__init__r#   r$   r    r    r   r!   r*      r)   r*   c                       r   )	Adam32bitr   r   r   r   Fr   Nr	   r
   Tc                    r+   )Nr   r   r   r   r   r   r    r!   r      r"   zAdam32bit.__init__r#   r$   r    r    r   r!   r-      r)   r-   c                       r   )	PagedAdamr   r   r   r   Fr   Nr	   r
   Tc                    s(   t  jd||||||||	|
|dd d S )Nr   Tr   r   r   r   r    r!   r       r"   zPagedAdam.__init__r#   r$   r    r    r   r!   r.      r)   r.   c                       r   )PagedAdam8bitr   r   r   r   Fr   Nr	   r
   Tc                    (   t  jd|||||d||	|
|dd d S )Nr   r,   Tr   r   r   r   r    r!   r   %   r"   zPagedAdam8bit.__init__r#   r$   r    r    r   r!   r/   $   r)   r/   c                       r   )PagedAdam32bitr   r   r   r   Fr   Nr	   r
   Tc                    r0   )Nr   r   Tr   r   r   r   r    r!   r   *   r"   zPagedAdam32bit.__init__r#   r$   r    r    r   r!   r1   )   r)   r1   c                       sR   e Zd ZdZ							d fd	d
	Zedd Zedd ZdddZ  Z	S )AnalysisAdama  Adam that performs 8-bit vs 32-bit error analysis.

    This implementation is modified from torch.optim.Adam based on:
    `Fixed Weight Decay Regularization in Adam`
    (see https://arxiv.org/abs/1711.05101)

    It has been proposed in `Adam: A Method for Stochastic Optimization`_.

    Arguments:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        lr (float, optional): learning rate (default: 1e-3)
        betas (Tuple[float, float], optional): coefficients used for computing
            running averages of gradient and its square (default: (0.9, 0.999))
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-8)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        amsgrad (boolean, optional): whether to use the AMSGrad variant of this
            algorithm from the paper `On the Convergence of Adam and Beyond`_

    .. _Adam: A Method for Stochastic Optimization:
        https://arxiv.org/abs/1412.6980
    .. _On the Convergence of Adam and Beyond:
        https://openreview.net/forum?id=ryQu7f-RZ
    r   r   r   r   Fdynamic-blockwiseNc	           
         s0   t |||||d}	t ||	 || _|| _d S )N)r   r   r   r   r   )dictr   r   analysissavedir)
r   r   r   r   r   r   r   bnb_analysisr6   defaultsr   r    r!   r   I   s   
zAnalysisAdam.__init__c                 C      dS NTr    r   r    r    r!   supports_memory_efficient_fp16_      z+AnalysisAdam.supports_memory_efficient_fp16c                 C   r9   r:   r    r;   r    r    r!   supports_flat_paramsc   r=   z!AnalysisAdam.supports_flat_paramsc           &   
   C   s  d}|dur	| }| j D ]}t|d D ]\}}|jdu r q|jj}|jtjtjhv r1| }|j	r8t
d|dd}|rBJ |j}|jjtjtjhv rS| }| j| }	t|	dkrd|	d< t||	d< t||	d	< tjd
|jd|	d< tjd
|jd|	d< tjd
|jd|	d< |rt||	d< n|	d ||	d< |	d	 ||	d	< |r|	d ||	d< |	d  d7  < |d \}
}d|
|	d   }d||	d   }|d t| | }|	d }|	d }|	d }|d dkr|j||d  |d  d |	d |	d	 }}|r|	d }||
j|d|
 d ||j||d| d | |d }|| }| dksB| dkrK|| | 7 }n| jdkrtjdd|j}tjdd|j}tj||d\}}t||}tj||d\}}t||}n| jdkrtjdd|j}tjdd|j}tj||d\}}t||}tj||d\}}t||}nt| jdkrtjdd|j}tjdd|j}tj||d\}}t||}tj||d\}}t||}n;| jdkr!t |}t |}tj!||d}t"||}tj!||d}t"||}n| jd kr(n	t#d!| j d"| |d }|| }t$|| } | t$|d#  }!|% |% }}t&||% |% |  t&||% |% |! t&||% |% t'|  || | 7 }t() rt(* dkr| j+d$kr|	d d% dkrt,j-.| j+st,/| j+ d&0d'd( |j1D }"t,j-0| j+| d&|" d)}#t,j-0| j+| d&|" d*}$t,j-0| j+| d&|" d+}%t2||# t2||$ t2||% |jjtjtjhv r|j3| qq|S ),zPerforms a single optimization step.

        Arguments:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   zJAdam does not support sparse gradients, please consider SparseAdam insteadr   Fr   stepexp_avg
exp_avg_sq)   rB   )device	abserrors	relerrorscountsmax_exp_avg_sq   r   r   r   )alpha)valuer   i    ir3   T)signed)codedynamiclinearquantilezmy-quantization-routinezInvalid analysis value: !gư> r
   _c                 S   s   g | ]}t |qS r    )str).0dimr    r    r!   
<listcomp>   s    z%AnalysisAdam.step.<locals>.<listcomp>z_abserr.pklz_relerr.pklz_counts.pkl)4param_groups	enumerategraddatadtypetorchfloat16bfloat16float	is_sparseRuntimeErrorgetstatelen
zeros_likezerosrC   tomathsqrtadd_mul_addcmul_numelr5   Fcreate_dynamic_mapquantize_blockwisedequantize_blockwisequantize
dequantizecreate_linear_mapestimate_quantilesquantize_no_absmaxdequantize_no_absmax
ValueErrorabsinthistogram_scatter_add_2d	ones_likedistis_initializedget_rankr6   ospathexistsmakedirsjoinshapesavecopy_)&r   closurelossgroupp_idprY   r   p_data_fp32rc   beta1beta2bias_correction1bias_correction2	step_sizeerelerF   r@   rA   rG   denomupdate_fp32code1code2C1S1state1C2S2state2update_8bitabserrrelerrshapestrpathepathrele
pathcountsr    r    r!   r?   g   s  






  zAnalysisAdam.step)r   r   r   r   Fr3   N)N)
r%   r&   r'   __doc__r   propertyr<   r>   r?   r(   r    r    r   r!   r2   .   s    

r2   )rh   r   r\   torch.distributeddistributedr}   bitsandbytes.functional
functionalrn   bitsandbytes.optim.optimizerr   r   r*   r-   r.   r/   r1   optim	Optimizerr2   r    r    r    r!   <module>   s   