o
    h_O                     @   s  d dl Z d dlZd dlmZ d dlZd dlmZ dd Zdd ZdEdd	Z	d
d Z
dd ZdEddZdFdedededefddZdFdedededefddZ					dGdedededededeej defdd Zded!edefd"d#Zdedefd$d%Zdedefd&d'Zd(d) ZdHd+d,Zd-d. ZdIded/edefd0d1ZdIded/edefd2d3Zd4d5 Z	7dJdeded8ed9efd:d;Z	7dJdeded8ed9efd<d=ZdHd>d?ZdKdAdBZdCdD Z e eZ!e eZ"e eZ#e eZ$e eZ%e eZ&e eZ'e eZ(e eZ)e eZ*e eZ+dS )L    N)Tensor)Optionalc                 C   8   t   | ||W  d    S 1 sw   Y  d S N)torchno_graduniform_tensorab r   C/var/www/html/ai/venv/lib/python3.10/site-packages/torch/nn/init.py_no_grad_uniform_      

$r   c                 C   r   r   )r   r   normal_r
   meanstdr   r   r   _no_grad_normal_   r   r   c           	      C   s   dd }||d|  k s||d|  krt jddd t D ||| | }||| | }| jd| d d| d |d |   | |td  | 	| | j
||d	 | W  d    S 1 sfw   Y  d S )
Nc                 S   s   dt | t d  d S )N      ?       @)matherfsqrt)xr   r   r   norm_cdf   s   z(_no_grad_trunc_normal_.<locals>.norm_cdf   zjmean is more than 2 std from [a, b] in nn.init.trunc_normal_. The distribution of values may be incorrect.
stacklevel   	generatorr   )minmax)warningswarnr   r   r   erfinv_mul_r   r   add_clamp_)	r
   r   r   r   r   r"   r   lur   r   r   _no_grad_trunc_normal_   s    
 
$r-   c                 C   s6   t   | |W  d    S 1 sw   Y  d S r   )r   r   fill_r
   valr   r   r   _no_grad_fill_9   s   
$r1   c                 C   s4   t   |  W  d    S 1 sw   Y  d S r   )r   r   zero_r
   r   r   r   _no_grad_zero_>   s   
$r4   c                 C   s   g d}| |v s| dkrdS | dkrdS | dkrt dS | dkrM|d	u r(d
}nt|ts2t|ts7t|tr:|}ntd| dt dd|d   S | dkrSdS td|  )a  Return the recommended gain value for the given nonlinearity function.
    The values are as follows:

    ================= ====================================================
    nonlinearity      gain
    ================= ====================================================
    Linear / Identity :math:`1`
    Conv{1,2,3}D      :math:`1`
    Sigmoid           :math:`1`
    Tanh              :math:`\frac{5}{3}`
    ReLU              :math:`\sqrt{2}`
    Leaky Relu        :math:`\sqrt{\frac{2}{1 + \text{negative\_slope}^2}}`
    SELU              :math:`\frac{3}{4}`
    ================= ====================================================

    .. warning::
        In order to implement `Self-Normalizing Neural Networks`_ ,
        you should use ``nonlinearity='linear'`` instead of ``nonlinearity='selu'``.
        This gives the initial weights a variance of ``1 / N``,
        which is necessary to induce a stable fixed point in the forward pass.
        In contrast, the default gain for ``SELU`` sacrifices the normalization
        effect for more stable gradient flow in rectangular layers.

    Args:
        nonlinearity: the non-linear function (`nn.functional` name)
        param: optional parameter for the non-linear function

    Examples:
        >>> gain = nn.init.calculate_gain('leaky_relu', 0.2)  # leaky_relu with negative_slope=0.2

    .. _Self-Normalizing Neural Networks: https://papers.nips.cc/paper/2017/hash/5d44ee6f2c3f71b73125876103c8f6c4-Abstract.html
    )linearconv1dconv2dconv3dconv_transpose1dconv_transpose2dconv_transpose3dsigmoidr    tanhg?relur   
leaky_reluN{Gz?znegative_slope z not a valid numberr   selug      ?zUnsupported nonlinearity )r   r   
isinstanceboolintfloat
ValueError)nonlinearityparam
linear_fnsnegative_sloper   r   r   calculate_gainC   s"   !
rK           r   r
   r   r   returnc                 C   0   t j| rt jjt| f| ||dS t| ||S )ad  Fills the input Tensor with values drawn from the uniform
    distribution :math:`\mathcal{U}(a, b)`.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        a: the lower bound of the uniform distribution
        b: the upper bound of the uniform distribution

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.uniform_(w)
    r	   )r   	overrideshas_torch_function_variadichandle_torch_functionr   r   r	   r   r   r   r   z      r   r   r   c                 C   rN   )az  Fills the input Tensor with values drawn from the normal
    distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        mean: the mean of the normal distribution
        std: the standard deviation of the normal distribution

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.normal_(w)
    r   )r   rO   rP   rQ   r   r   r   r   r   r   r      rR   r          r   r"   c                 C   s   t | |||||dS )a  Fills the input Tensor with values drawn from a truncated
    normal distribution. The values are effectively drawn from the
    normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
    with values outside :math:`[a, b]` redrawn until they are within
    the bounds. The method used for generating the random values works
    best when :math:`a \leq \text{mean} \leq b`.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        mean: the mean of the normal distribution
        std: the standard deviation of the normal distribution
        a: the minimum cutoff value
        b: the maximum cutoff value

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.trunc_normal_(w)
    r!   )r-   )r
   r   r   r   r   r"   r   r   r   trunc_normal_   s   rT   r0   c                 C   s,   t j| rt jjt| f| |dS t| |S )zFills the input Tensor with the value :math:`\text{val}`.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        val: the value to fill the tensor with

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.constant_(w, 0.3)
    r/   )r   rO   rP   rQ   	constant_r1   r/   r   r   r   rU      s   
rU   c                 C   s
   t | dS )zFills the input Tensor with the scalar value `1`.

    Args:
        tensor: an n-dimensional `torch.Tensor`

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.ones_(w)
    r   )r1   r3   r   r   r   ones_   s   

rV   c                 C   s   t | S )zFills the input Tensor with the scalar value `0`.

    Args:
        tensor: an n-dimensional `torch.Tensor`

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.zeros_(w)
    )r4   r3   r   r   r   zeros_   s   
rW   c                 C   sX   |   dkr
tdt  tj| j| | jd W d   | S 1 s%w   Y  | S )a=  Fills the 2-dimensional input `Tensor` with the identity
    matrix. Preserves the identity of the inputs in `Linear` layers, where as
    many inputs are preserved as possible.

    Args:
        tensor: a 2-dimensional `torch.Tensor`

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.eye_(w)
    r   ,Only tensors with 2 dimensions are supported)outrequires_gradN)
ndimensionrF   r   r   eyeshaperZ   r3   r   r   r   eye_   s   

r^   r    c                 C   s<  |   }|dvrtd|  }|d | dkrtd|d | }t||d }t g |   t|D ]U}t|D ]N}|dkrSd| || | || dd f< q<|dkrnd| || | || dd | dd f< q<d| || | || dd | dd | dd f< q<q6W d	   | S 1 sw   Y  | S )
aF  Fills the {3, 4, 5}-dimensional input `Tensor` with the Dirac
    delta function. Preserves the identity of the inputs in `Convolutional`
    layers, where as many input channels are preserved as possible. In case
    of groups>1, each group of channels preserves identity

    Args:
        tensor: a {3, 4, 5}-dimensional `torch.Tensor`
        groups (int, optional): number of groups in the conv layer (default: 1)
    Examples:
        >>> w = torch.empty(3, 16, 5, 5)
        >>> nn.init.dirac_(w)
        >>> w = torch.empty(3, 24, 5, 5)
        >>> nn.init.dirac_(w, 3)
    )         z5Only tensors with 3, 4, or 5 dimensions are supportedr   z!dim 0 must be divisible by groupsr    r_   r   r`   N)r[   rF   sizer#   r   r   r2   range)r
   groups
dimensionssizesout_chans_per_grpmin_dimgdr   r   r   dirac_   s:   
"
rk   c                 C   sp   |   }|dk rtd| d}| d}d}|   dkr,| jdd  D ]}||9 }q%|| }|| }||fS )Nr   zNFan in and fan out can not be computed for tensor with fewer than 2 dimensionsr    r   )dimrF   rb   r]   )r
   re   num_input_fmapsnum_output_fmapsreceptive_field_sizesfan_infan_outr   r   r   _calculate_fan_in_and_fan_out#  s   


rs   gainc                 C   sB   t | \}}|tdt||   }td| }t| | |S )a  Fills the input `Tensor` with values according to the method
    described in `Understanding the difficulty of training deep feedforward
    neural networks` - Glorot, X. & Bengio, Y. (2010), using a uniform
    distribution. The resulting tensor will have values sampled from
    :math:`\mathcal{U}(-a, a)` where

    .. math::
        a = \text{gain} \times \sqrt{\frac{6}{\text{fan\_in} + \text{fan\_out}}}

    Also known as Glorot initialization.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        gain: an optional scaling factor

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.xavier_uniform_(w, gain=nn.init.calculate_gain('relu'))
    r         @)rs   r   r   rE   r   )r
   rt   rq   rr   r   r   r   r   r   xavier_uniform_6  s   rv   c                 C   s2   t | \}}|tdt||   }t| d|S )a  Fills the input `Tensor` with values according to the method
    described in `Understanding the difficulty of training deep feedforward
    neural networks` - Glorot, X. & Bengio, Y. (2010), using a normal
    distribution. The resulting tensor will have values sampled from
    :math:`\mathcal{N}(0, \text{std}^2)` where

    .. math::
        \text{std} = \text{gain} \times \sqrt{\frac{2}{\text{fan\_in} + \text{fan\_out}}}

    Also known as Glorot initialization.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        gain: an optional scaling factor

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.xavier_normal_(w)
    r   rL   )rs   r   r   rE   r   )r
   rt   rq   rr   r   r   r   r   xavier_normal_Q  s   rw   c                 C   sH   |  }ddg}||vrtd| d| t| \}}|dkr"|S |S )Nrq   rr   zMode z" not supported, please use one of )lowerrF   rs   )r
   modevalid_modesrq   rr   r   r   r   _calculate_correct_fank  s   r{   rq   r?   ry   rG   c                 C   s   t j| rt jjt| f| |||dS d| jv rtd | S t| |}t	||}|t
| }t
d| }t   | | |W  d   S 1 sMw   Y  dS )a  Fills the input `Tensor` with values according to the method
    described in `Delving deep into rectifiers: Surpassing human-level
    performance on ImageNet classification` - He, K. et al. (2015), using a
    uniform distribution. The resulting tensor will have values sampled from
    :math:`\mathcal{U}(-\text{bound}, \text{bound})` where

    .. math::
        \text{bound} = \text{gain} \times \sqrt{\frac{3}{\text{fan\_mode}}}

    Also known as He initialization.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        a: the negative slope of the rectifier used after this layer (only
            used with ``'leaky_relu'``)
        mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'``
            preserves the magnitude of the variance of the weights in the
            forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the
            backwards pass.
        nonlinearity: the non-linear function (`nn.functional` name),
            recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.kaiming_uniform_(w, mode='fan_in', nonlinearity='relu')
    )r
   r   ry   rG   r   ,Initializing zero-element tensors is a no-opru   N)r   rO   rP   rQ   kaiming_uniform_r]   r%   r&   r{   rK   r   r   r   r   )r
   r   ry   rG   fanrt   r   boundr   r   r   r}   u  s&   




$r}   c                 C   sr   d| j v rtd | S t| |}t||}|t| }t  | 	d|W  d   S 1 s2w   Y  dS )a  Fills the input `Tensor` with values according to the method
    described in `Delving deep into rectifiers: Surpassing human-level
    performance on ImageNet classification` - He, K. et al. (2015), using a
    normal distribution. The resulting tensor will have values sampled from
    :math:`\mathcal{N}(0, \text{std}^2)` where

    .. math::
        \text{std} = \frac{\text{gain}}{\sqrt{\text{fan\_mode}}}

    Also known as He initialization.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        a: the negative slope of the rectifier used after this layer (only
            used with ``'leaky_relu'``)
        mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'``
            preserves the magnitude of the variance of the weights in the
            forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the
            backwards pass.
        nonlinearity: the non-linear function (`nn.functional` name),
            recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.kaiming_normal_(w, mode='fan_out', nonlinearity='relu')
    r   r|   N)
r]   r%   r&   r{   rK   r   r   r   r   r   )r
   r   ry   rG   r~   rt   r   r   r   r   kaiming_normal_  s   





$r   c           	      C   s   |   dk r
td|  dkr| S | d}|  | }| ||dd}||k r/|  tj	|\}}t
|d}| }||9 }||k rM|  t  | || | | W d   | S 1 sjw   Y  | S )a]  Fills the input `Tensor` with a (semi) orthogonal matrix, as
    described in `Exact solutions to the nonlinear dynamics of learning in deep
    linear neural networks` - Saxe, A. et al. (2013). The input tensor must have
    at least 2 dimensions, and for tensors with more than 2 dimensions the
    trailing dimensions are flattened.

    Args:
        tensor: an n-dimensional `torch.Tensor`, where :math:`n \geq 2`
        gain: optional scaling factor

    Examples:
        >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_LAPACK)
        >>> w = torch.empty(3, 5)
        >>> nn.init.orthogonal_(w)
    r   z4Only tensors with 2 or more dimensions are supportedr   r    N)r[   rF   numelrb   newr   t_r   linalgqrdiagsignr   view_ascopy_r(   )	r
   rt   rowscols	flattenedqrrj   phr   r   r   orthogonal_  s,   


r   r@   c           	      C   s   |   dkr
td| j\}}tt|| }t ' | d| t	|D ]}t
|}|d| }d| ||f< q'W d   | S 1 sFw   Y  | S )aN  Fills the 2D input `Tensor` as a sparse matrix, where the
    non-zero elements will be drawn from the normal distribution
    :math:`\mathcal{N}(0, 0.01)`, as described in `Deep learning via
    Hessian-free optimization` - Martens, J. (2010).

    Args:
        tensor: an n-dimensional `torch.Tensor`
        sparsity: The fraction of elements in each column to be set to zero
        std: the standard deviation of the normal distribution used to generate
            the non-zero values

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.sparse_(w, sparsity=0.1)
    r   rX   r   N)r[   rF   r]   rD   r   ceilr   r   r   rc   randperm)	r
   sparsityr   r   r   	num_zeroscol_idxrow_indiceszero_indicesr   r   r   sparse_  s   



r   c                    sF    j d d  fdd}d d d d|_|_ |S )Nc                     s*   t jd d ddd  | i |S )Nznn.init.z' is now deprecated in favor of nn.init..r   r   )r%   r&   )argskwargsmethnew_nameold_namer   r   deprecated_init  s   z(_make_deprecate.<locals>.deprecated_initz
    z_(...)

    .. warning::
        This method is now deprecated in favor of :func:`torch.nn.init.z"`.

    See :func:`~torch.nn.init.z` for details.)__name____doc__)r   r   r   r   r   _make_deprecate  s   
r   r   )rL   r   )rL   r   rS   r   N)r    )r   )r   rq   r?   )r@   ),r   r%   r   r   typingr   	_Optionalr   r   r-   r1   r4   rK   rE   r   r   	GeneratorrT   rU   rV   rW   r^   rk   rs   rv   rw   r{   strr}   r   r   r   r   uniformnormalconstantr\   diracxavier_uniformxavier_normalkaiming_uniformkaiming_normal
orthogonalsparser   r   r   r   <module>   s    
#
7

+
2

'
- 