o
    h%                     @   s   d dl Z d dlZd dlZddlmZmZ ddlmZmZm	Z	 e	ddr,d dl
m  mZ dd ZG d	d
 d
ejjZdefddZdS )    N   )AcceleratorStateGradientState)DistributedType
honor_typeis_tpu_availableF)check_devicec                    sh   t | ttfrt|  fdd| D S t | tr't|  fdd|  D S t | tjr2| 	 S | S )Nc                 3   s    | ]}t | V  qd S Nmove_to_device).0tdevice J/var/www/html/ai/venv/lib/python3.10/site-packages/accelerate/optimizer.py	<genexpr>   s    z!move_to_device.<locals>.<genexpr>c                    s   i | ]
\}}|t | qS r   r
   r   kvr   r   r   
<dictcomp>    s    z"move_to_device.<locals>.<dictcomp>)

isinstancelisttupler   dicttypeitemstorchTensorto)stater   r   r   r   r      s   

r   c                   @   s   e Zd ZdZd#ddZedd Zejdd Zed	d
 Zejdd
 Zedd Z	e	jdd Z	dd Z
dd Zdd Zd$ddZd$ddZdd Zedd Zedd Zdd  Zd!d" ZdS )%AcceleratedOptimizera  
    Internal wrapper around a torch optimizer.

    Conditionally will perform `step` and `zero_grad` if gradients should be synchronized when performing gradient
    accumulation.

    Args:
        optimizer (`torch.optim.optimizer.Optimizer`):
            The optimizer to wrap.
        device_placement (`bool`, *optional*, defaults to `True`):
            Whether or not the optimizer should handle device placement. If so, it will place the state dictionary of
            `optimizer` on the right device.
        scaler (`torch.cuda.amp.grad_scaler.GradScaler`, *optional*):
            The scaler to use in the step function if training with mixed precision.
    TNc                 C   s   || _ || _t | _t | _|| _d| _| jd ur)d| _| j j	| _
t| | j j	| _|rO| j  }| jjtjkr@t|| jj nt|| jj}| j | d S d S NF)	optimizerscalerr   accelerator_stater   gradient_statedevice_placement_is_overflow_accelerate_step_calledstep_optimizer_original_step_methodpatch_optimizer_step_optimizer_patched_step_method
state_dictdistributed_typer   TPUxmsend_cpu_data_to_devicer   r   load_state_dict)selfr#   r'   r$   r.   r   r   r   __init__7   s"   


zAcceleratedOptimizer.__init__c                 C      | j jS r	   r#   r    r4   r   r   r   r    M      zAcceleratedOptimizer.statec                 C      || j _d S r	   r7   r4   r    r   r   r   r    Q      c                 C   r6   r	   r#   param_groupsr8   r   r   r   r>   U   r9   z!AcceleratedOptimizer.param_groupsc                 C   r:   r	   r=   )r4   r>   r   r   r   r>   Y   r<   c                 C   r6   r	   r#   defaultsr8   r   r   r   r@   ]   r9   zAcceleratedOptimizer.defaultsc                 C   r:   r	   r?   )r4   r@   r   r   r   r@   a   r<   c                 C   s   | j | d S r	   )r#   add_param_group)r4   param_groupr   r   r   rA   e   s   z$AcceleratedOptimizer.add_param_groupc                 C   s4   | j jtjkr| jrt|| j j | j	| d S r	   )
r%   r/   r   r0   r'   r1   r2   r   r#   r3   )r4   r.   r   r   r   r3   h   s   z$AcceleratedOptimizer.load_state_dictc                 C   s
   | j  S r	   )r#   r.   r8   r   r   r   r.   m   s   
zAcceleratedOptimizer.state_dictc                 C   s`   | j jr.dt| jjjv }|r|d u rd}| jj|d d S |d ur'td| j  d S d S )Nset_to_noneF)rC   zJ`set_to_none` for Optimizer.zero_grad` is not supported by this optimizer.)r&   sync_gradientsinspect	signaturer#   	zero_grad
parameters
ValueError)r4   rC   
accept_argr   r   r   rG   p   s   zAcceleratedOptimizer.zero_gradc                 C   s   | j jrR| jjtjkr|d urd|ini }tj| j|d d S | j	d urJ| j
| j_| j	| j| | j	  | js=d| _nd| _| j| j_d| _d S | j| d S d S )Nclosure)optimizer_argsTF)r&   rD   r%   r/   r   r0   r1   optimizer_stepr#   r$   r-   r*   updater)   r(   r+   )r4   rK   rL   r   r   r   r*   |   s   




zAcceleratedOptimizer.stepc                    s,   | j jD ]} fdd|d D |d< qd S )Nc                    s   g | ]}  ||qS r   )get)r   pparameters_mapr   r   
<listcomp>   s    z;AcceleratedOptimizer._switch_parameters.<locals>.<listcomp>paramsr=   )r4   rR   rB   r   rQ   r   _switch_parameters   s   z'AcceleratedOptimizer._switch_parametersc                 C   s   t dt | jS )zTWhether or not the optimizer step was done, or skipped because of gradient overflow.zThe `is_overflow` property is deprecated and will be removed in version 1.0 of Accelerate use `optimizer.step_was_skipped` instead.)warningswarnFutureWarningr(   r8   r   r   r   is_overflow   s
   z AcceleratedOptimizer.is_overflowc                 C   s   | j S )z.Whether or not the optimizer step was skipped.)r(   r8   r   r   r   step_was_skipped   s   z%AcceleratedOptimizer.step_was_skippedc                    s    g d  fdd| j  D S )N)r)   r+   r-   c                    s   i | ]\}}| vr||qS r   r   r   _ignored_keysr   r   r      s    z5AcceleratedOptimizer.__getstate__.<locals>.<dictcomp>)__dict__r   r8   r   r[   r   __getstate__   s   z!AcceleratedOptimizer.__getstate__c                 C   s>   | j | | jd urd| _| jj| _t| | jj| _d S d S r"   )	r]   rN   r$   r)   r#   r*   r+   r,   r-   r;   r   r   r   __setstate__   s   

z!AcceleratedOptimizer.__setstate__)TNr	   )__name__
__module____qualname____doc__r5   propertyr    setterr>   r@   rA   r3   r.   rG   r*   rU   rY   rZ   r^   r_   r   r   r   r   r!   &   s6    









	
r!   accelerated_optimizerc                    s    fdd}|S )Nc                     s   d _ | i |S )NT)r)   )argskwargsrf   methodr   r   patched_step   s   z*patch_optimizer_step.<locals>.patched_stepr   )rf   rj   rk   r   ri   r   r,      s   r,   )rE   rV   r   r    r   r   utilsr   r   r   torch_xla.core.xla_modelcore	xla_modelr1   r   optim	Optimizerr!   r,   r   r   r   r   <module>   s   

 