o
    h                     @   sj   U d dl mZmZmZmZ d dlZd dlm  mZ	 d dlm
Z
 g Zee ed< ejjG dd dZdS )    )DictListOptionalTupleN)Tensor__all__c                   @   s   e Zd Z									ddee dedeeef d	ed
edededededefddZdede	e fddZ
dee	e  fddZdS )_FunctionalAdamWMbP?g?g+?:0yE>{Gz?Fparamslrbetasepsweight_decayamsgradmaximizeforeachfused_allow_empty_param_listc                 C   s  d|kst d| d|kst d| d|d   kr"dk s,n t d|d  d|d   kr8dk sBn t d|d  d|ksMt d	| |||d |d |d
| _|| _|| _|| _|	| _tjt	tj
t	ttj
f f i | _t|dkr|
st dd|i| _d S )N        zInvalid learning rate: zInvalid epsilon value: r   g      ?z#Invalid beta parameter at index 0:    z#Invalid beta parameter at index 1: zInvalid weight_decay value: )r   r   beta1beta2r   z%optimizer got an empty parameter listr   )
ValueErrordefaultsr   r   r   r   torchjitannotater   r   strstatelenparam_group)selfr   r   r   r   r   r   r   r   r   r    r%   ^/var/www/html/ai/venv/lib/python3.10/site-packages/torch/distributed/optim/functional_adamw.py__init__   s0   $z_FunctionalAdamW.__init__paramgradc           
      C   sj  g }g }g }g }g }g }|d ur| | | | || jvrQi | j|< | j| }	td|	d< tj|tjd|	d< tj|tjd|	d< | jrQtj|tjd|	d< | j| }	| |	d  | |	d  | jrn| |	d  | |	d  t 2 tj	||||||| j| j
| jd | jd | jd	 | jd
 | jd | j| jd d d W d    d S 1 sw   Y  d S )Nr   stepmemory_formatexp_avg
exp_avg_sqmax_exp_avg_sqr   r   r   r   r   r   r   r   r   r   r   r   r   r   
grad_scale	found_inf)appendr!   r   tensor
zeros_likepreserve_formatr   no_gradFadamwr   r   r   r   )
r$   r(   r)   params_with_gradgradsexp_avgsexp_avg_sqsmax_exp_avg_sqsstate_stepsr!   r%   r%   r&   
step_paramA   sd   









"z_FunctionalAdamW.step_param	gradientsc                 C   s  | j d }g }g }g }g }g }g }t|t|kr,tddt| d dt|  t| j d |D ]m\}	}
|
d ur||	 ||
 |	| jvr}i | j|	< | j|	 }td|d< tj|	tj	d|d	< tj|	tj	d|d
< | j
r}tj|	tj	d|d< | j|	 }||d	  ||d
  | j
r||d  ||d  q4t 2 tj||||||| j
| j| jd | jd | jd | jd | jd | j| jd d d W d    d S 1 sw   Y  d S )Nr   zEthe gradients passed in does not equal to the size of the parameters!zParams length: z. zGradients length: r   r*   r+   r-   r.   r/   r   r   r   r   r   r0   )r#   r"   r   zipr3   r!   r   r4   r5   r6   r   r7   r8   r9   r   r   r   r   )r$   rA   r   r:   r;   r<   r=   r>   r?   r(   gradientr!   r%   r%   r&   r*   |   sz   










"z_FunctionalAdamW.stepN)	r	   r
   r   r   FFFFF)__name__
__module____qualname__r   r   floatr   boolr'   r   r@   r*   r%   r%   r%   r&   r      sB    
	

,;r   )typingr   r   r   r   r   torch.optim._functionaloptim_functionalr8   r   r   r    __annotations__r   scriptr   r%   r%   r%   r&   <module>   s    