o
    hG                     @   s   d dl Z d dlmZmZmZmZmZmZmZ d dl	Z	d dl	m
Z
mZ d dlmZmZ ee	j
ee	j
 f Zg dZ		dded	ed
ededee de	j
fddZ		dded	ed
ededee de	j
fddZddededee ddfddZdS )    N)UnionIterableListDictTupleOptionalcast)Tensorinf)"_group_tensors_by_device_and_dtype_has_foreach_support)clip_grad_norm_clip_grad_normclip_grad_value_       @F
parametersmax_norm	norm_typeerror_if_nonfiniteforeachreturnc                    s  t | tjr	| g} dd | D }t|}tt|dkr#tdS |d j tdd |D g}tkrR fdd|D }t|dkrI|d nt	t
|}nNg }| D ]6\\}	}
\\}}
|du sg|rwt||	d	rw|t| qX|rtd
|	j d|fdd|D  qXtjt
 fdd|D }|rt| | rtd d||d  }tj|dd}| D ];\\}	}
\\}}
|du s|rt||	d	rt|||	 q|rtd
|	j d||	}|D ]	}| | qq|S )aD  Clips gradient norm of an iterable of parameters.

    The norm is computed over all gradients together, as if they were
    concatenated into a single vector. Gradients are modified in-place.

    Args:
        parameters (Iterable[Tensor] or Tensor): an iterable of Tensors or a
            single Tensor that will have gradients normalized
        max_norm (float): max norm of the gradients
        norm_type (float): type of the used p-norm. Can be ``'inf'`` for
            infinity norm.
        error_if_nonfinite (bool): if True, an error is thrown if the total
            norm of the gradients from :attr:`parameters` is ``nan``,
            ``inf``, or ``-inf``. Default: False (will switch to True in the future)
        foreach (bool): use the faster foreach-based implementation.
            If ``None``, use the foreach implementation for CUDA and CPU native tensors and silently
            fall back to the slow implementation for other device types.
            Default: ``None``

    Returns:
        Total norm of the parameter gradients (viewed as a single vector).
    c                 S      g | ]
}|j d ur|j qS Ngrad.0p r   N/var/www/html/ai/venv/lib/python3.10/site-packages/torch/nn/utils/clip_grad.py
<listcomp>'       z#clip_grad_norm_.<locals>.<listcomp>r   g        c                 S   s   g | ]}|  qS r   )detachr   gr   r   r   r    .   s    c                    s$   g | ]}t j| t qS r   )torchlinalgvector_normr"   r
   tor#   first_devicer   r   r    1   s   $    Ndevice:foreach=True was passed, but can't use the foreach API on  tensorsc                    s   g | ]	}t j| qS r   )r%   r&   r'   r#   )r   r   r   r    ;   s    c                    s   g | ]}|  qS r   )r(   )r   normr)   r   r   r    =   s    zThe total norm of order z for gradients from `parameters` is non-finite, so it cannot be clipped. To disable this error and scale the gradients by the non-finite norm anyway, set `error_if_nonfinite=False`gư>g      ?)max)
isinstancer%   r	   floatlentensorr-   r   r
   r1   stackitemsr   extend_foreach_normRuntimeErrortyper&   r'   
logical_orisnanisinfclamp_foreach_mul_r(   r"   mul_)r   r   r   r   r   gradsgrouped_gradsnorms
total_normr-   _	clip_coefclip_coef_clampedclip_coef_clamped_devicer$   r   )r*   r   r   r      sJ   

&"

r   c                 C   s   t jddd t| ||||S )zClips gradient norm of an iterable of parameters.

    .. warning::
        This method is now deprecated in favor of
        :func:`torch.nn.utils.clip_grad_norm_`.
    z[torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.   )
stacklevel)warningswarnr   )r   r   r   r   r   r   r   r   r   W   s   	r   
clip_valuec              	   C   s  t | tjr	| g} t|}dd | D }t|g}| D ]a\\}}\\}}|du s,|rOtttt ||drOt	ttt ||  t
ttt || q|rZtd|j dt  |D ]}tt|j| |d qaW d   n1 syw   Y  qdS )a  Clips gradient of an iterable of parameters at specified value.

    Gradients are modified in-place.

    Args:
        parameters (Iterable[Tensor] or Tensor): an iterable of Tensors or a
            single Tensor that will have gradients normalized
        clip_value (float): maximum allowed value of the gradients.
            The gradients are clipped in the range
            :math:`\left[\text{-clip\_value}, \text{clip\_value}\right]`
        foreach (bool): use the faster foreach-based implementation
            If ``None``, use the foreach implementation for CUDA and CPU native tensors and
            silently fall back to the slow implementation for other device types.
            Default: ``None``
    c                 S   r   r   r   r   r   r   r   r    y   r!   z$clip_grad_value_.<locals>.<listcomp>Nr,   r.   r/   )minr1   )r2   r%   r	   r3   r   r7   r   r   r   _foreach_clamp_min__foreach_clamp_max_r:   r;   no_gradclamp_)r   rN   r   rB   rC   r-   rF   r   r   r   r   r   e   s$   
"
r   )r   FNr   )rL   typingr   r   r   r   r   r   r   r%   r	   r
   torch.utils._foreach_utilsr   r   _tensor_or_tensors__all__r3   boolr   r   r   r   r   r   r   <module>   sL    $
L
$