o
    h6                  	   @   s  d Z ddlZddlZddlmZ ddlmZmZm	Z	 ddl
Z
ddlZ
ddl
mZ ddlmZ ddlmZmZ dd	lmZ dd
lmZmZmZ ddlmZmZmZmZmZmZ ddlm Z  e!e"Z#dZ$dZ%g dZ&dgZ'eG dd deZ(dd Z)dd Z*dMde
j+de,de-de
j+fddZ.G dd  d ej/Z0G d!d" d"ej/Z1G d#d$ d$ej/Z2G d%d& d&ej/Z3G d'd( d(ej/Z4G d)d* d*ej/Z5G d+d, d,ej/Z6G d-d. d.ej/Z7G d/d0 d0ej/Z8G d1d2 d2ej/Z9G d3d4 d4ej/Z:G d5d6 d6ej/Z;G d7d8 d8ej/Z<G d9d: d:eZ=d;Z>d<Z?ed=e>G d>d? d?e=Z@G d@dA dAej/ZAG dBdC dCej/ZBG dDdE dEej/ZCG dFdG dGej/ZDG dHdI dIej/ZEedJe>G dKdL dLe=ZFdS )Nz# PyTorch Swin2SR Transformer model.    N)	dataclass)OptionalTupleUnion)nn   )ACT2FN)BaseModelOutputImageSuperResolutionOutput)PreTrainedModel) find_pruneable_heads_and_indicesmeshgridprune_linear_layer)ModelOutputadd_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardloggingreplace_return_docstrings   )Swin2SRConfigr   z!caidas/swin2SR-classical-sr-x2-64)r      i  i  c                   @   sL   e Zd ZU dZdZejed< dZe	e
ej  ed< dZe	e
ej  ed< dS )Swin2SREncoderOutputa  
    Swin2SR encoder's outputs, with potential hidden states and attentions.

    Args:
        last_hidden_state (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
            Sequence of hidden-states at the output of the last layer of the model.
        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
            shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `torch.FloatTensor` (one for each stage) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    Nlast_hidden_statehidden_states
attentions)__name__
__module____qualname____doc__r   torchFloatTensor__annotations__r   r   r   r    r#   r#   b/var/www/html/ai/venv/lib/python3.10/site-packages/transformers/models/swin2sr/modeling_swin2sr.pyr   :   s
   
 r   c                 C   sR   | j \}}}}| ||| ||| ||} | dddddd d|||}|S )z2
    Partitions the given input into windows.
    r   r   r            shapeviewpermute
contiguous)input_featurewindow_size
batch_sizeheightwidthnum_channelswindowsr#   r#   r$   window_partitionU   s   $r5   c                 C   sN   | j d }| d|| || |||} | dddddd d|||} | S )z?
    Merges windows to produce higher resolution features.
    r(   r   r   r   r%   r&   r'   r)   )r4   r/   r1   r2   r3   r#   r#   r$   window_reverseb   s   
$r6           Finput	drop_probtrainingreturnc                 C   sd   |dks|s| S d| }| j d fd| jd   }|tj|| j| jd }|  | || }|S )aF  
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
    however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
    layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
    argument.
    r7   r   r   )r   )dtypedevice)r*   ndimr    randr<   r=   floor_div)r8   r9   r:   	keep_probr*   random_tensoroutputr#   r#   r$   	drop_pathm   s   
rE   c                       sT   e Zd ZdZddee ddf fddZdejdejfdd	Z	de
fd
dZ  ZS )Swin2SRDropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr9   r;   c                    s   t    || _d S N)super__init__r9   )selfr9   	__class__r#   r$   rI      s   

zSwin2SRDropPath.__init__r   c                 C   s   t || j| jS rG   )rE   r9   r:   rJ   r   r#   r#   r$   forward   s   zSwin2SRDropPath.forwardc                 C   s   d | jS )Nzp={})formatr9   rJ   r#   r#   r$   
extra_repr   s   zSwin2SRDropPath.extra_reprrG   )r   r   r   r   r   floatrI   r    TensorrN   strrQ   __classcell__r#   r#   rK   r$   rF      s
    rF   c                       s>   e Zd ZdZ fddZdeej deej	 fddZ
  ZS )Swin2SREmbeddingsz?
    Construct the patch and optional position embeddings.
    c                    s`   t    t|| _| jj}|jr tt	d|d |j
| _nd | _t|j| _|j| _d S )Nr   )rH   rI   Swin2SRPatchEmbeddingspatch_embeddingsnum_patchesuse_absolute_embeddingsr   	Parameterr    zeros	embed_dimposition_embeddingsDropouthidden_dropout_probdropoutr/   )rJ   configrY   rK   r#   r$   rI      s   

zSwin2SREmbeddings.__init__pixel_valuesr;   c                 C   s4   |  |\}}| jd ur|| j }| |}||fS rG   )rX   r^   ra   )rJ   rc   
embeddingsoutput_dimensionsr#   r#   r$   rN      s
   


zSwin2SREmbeddings.forward)r   r   r   r   rI   r   r    r!   r   rS   rN   rU   r#   r#   rK   r$   rV      s    &rV   c                       sD   e Zd Zd fdd	Zdeej deejee	 f fddZ
  ZS )	rW   Tc                    s   t    |j}|j|j}}t|tjjr|n||f}t|tjjr%|n||f}|d |d  |d |d  g}|| _	|d |d  | _
tj||j||d| _|r[t|j| _d S d | _d S )Nr   r   )kernel_sizestride)rH   rI   r]   
image_size
patch_size
isinstancecollectionsabcIterablepatches_resolutionrY   r   Conv2d
projection	LayerNorm	layernorm)rJ   rb   normalize_patchesr3   rh   ri   rn   rK   r#   r$   rI      s   
  zSwin2SRPatchEmbeddings.__init__rd   r;   c                 C   sN   |  |}|j\}}}}||f}|ddd}| jd ur#| |}||fS )Nr%   r   )rp   r*   flatten	transposerr   )rJ   rd   _r1   r2   re   r#   r#   r$   rN      s   


zSwin2SRPatchEmbeddings.forward)T)r   r   r   rI   r   r    r!   r   rS   intrN   rU   r#   r#   rK   r$   rW      s    .rW   c                       (   e Zd ZdZ fddZdd Z  ZS )Swin2SRPatchUnEmbeddingszImage to Patch Unembeddingc                    s   t    |j| _d S rG   )rH   rI   r]   )rJ   rb   rK   r#   r$   rI      s   
z!Swin2SRPatchUnEmbeddings.__init__c                 C   s2   |j \}}}|dd|| j|d |d }|S )Nr   r%   r   )r*   ru   r+   r]   )rJ   rd   x_sizer0   height_widthr3   r#   r#   r$   rN      s   "z Swin2SRPatchUnEmbeddings.forwardr   r   r   r   rI   rN   rU   r#   r#   rK   r$   ry      s    ry   c                	       sh   e Zd ZdZejfdee dedejddf fddZ	d	d
 Z
dejdeeef dejfddZ  ZS )Swin2SRPatchMerginga'  
    Patch Merging Layer.

    Args:
        input_resolution (`Tuple[int]`):
            Resolution of input feature.
        dim (`int`):
            Number of input channels.
        norm_layer (`nn.Module`, *optional*, defaults to `nn.LayerNorm`):
            Normalization layer class.
    input_resolutiondim
norm_layerr;   Nc                    sB   t    || _|| _tjd| d| dd| _|d| | _d S )Nr&   r%   Fbias)rH   rI   r~   r   r   Linear	reductionnorm)rJ   r~   r   r   rK   r#   r$   rI      s
   
zSwin2SRPatchMerging.__init__c                 C   sF   |d dkp|d dk}|r!ddd|d d|d f}t j||}|S )Nr%   r   r   )r   
functionalpad)rJ   r.   r1   r2   
should_pad
pad_valuesr#   r#   r$   	maybe_pad   s
   zSwin2SRPatchMerging.maybe_padr.   input_dimensionsc                 C   s   |\}}|j \}}}|||||}| |||}|d d dd ddd dd d f }|d d dd ddd dd d f }	|d d dd ddd dd d f }
|d d dd ddd dd d f }t||	|
|gd}||dd| }| |}| |}|S )Nr   r%   r   r(   r&   )r*   r+   r   r    catr   r   )rJ   r.   r   r1   r2   r0   r   r3   input_feature_0input_feature_1input_feature_2input_feature_3r#   r#   r$   rN      s   $$$$

zSwin2SRPatchMerging.forward)r   r   r   r   r   rq   r   rw   ModulerI   r   r    rS   rN   rU   r#   r#   rK   r$   r}      s
    **r}   c                       sj   e Zd Zddgf fdd	Zdd Z			ddejd	eej d
eej dee	 de
ej f
ddZ  ZS )Swin2SRSelfAttentionr   c              
      s  t    || dkrtd| d| d|| _t|| | _| j| j | _t|tj	j
r0|n||f| _|| _ttdt|ddf | _ttjddd	d
tjd	dtjd|dd
| _tj| jd d  | jd tjd}tj| jd d  | jd tjd}tt||gddddd d}|d dkr|d d d d d d df  |d d   < |d d d d d d df  |d d   < n.|d d d d d d df  | jd d   < |d d d d d d df  | jd d   < |d9 }t|tt |d  t!d }| j"d|dd t| jd }	t| jd }
tt|	|
gdd}t#|d}|d d d d d f |d d d d d f  }|ddd }|d d d d df  | jd d 7  < |d d d d df  | jd d 7  < |d d d d df  d| jd  d 9  < |$d}| j"d|dd tj| j| j|j%d
| _&tj| j| jdd
| _'tj| j| j|j%d
| _(t)|j*| _+d S )Nr   zThe hidden size (z6) is not a multiple of the number of attention heads ()
   r   r%   i   Tr   inplaceFr<   ij)indexing         ?relative_coords_table)
persistentr(   relative_position_index),rH   rI   
ValueErrornum_attention_headsrw   attention_head_sizeall_head_sizerj   rk   rl   rm   r/   pretrained_window_sizer   r[   r    logoneslogit_scale
Sequentialr   ReLUcontinuous_position_bias_mlparangefloat32stackr   r,   r-   	unsqueezesignlog2absmathregister_bufferrt   sumqkv_biasquerykeyvaluer_   attention_probs_dropout_probra   )rJ   rb   r   	num_headsr/   r   relative_coords_hrelative_coords_wr   coords_hcoords_wcoordscoords_flattenrelative_coordsr   rK   r#   r$   rI     s\   
"&$$
,...&,((,
zSwin2SRSelfAttention.__init__c                 C   s6   |  d d | j| jf }||}|ddddS )Nr(   r   r%   r   r   )sizer   r   r+   r,   )rJ   xnew_x_shaper#   r#   r$   transpose_for_scoresK  s   
z)Swin2SRSelfAttention.transpose_for_scoresNFr   attention_mask	head_maskoutput_attentionsr;   c                 C   s  |j \}}}| |}| | |}	| | |}
| |}tjj|ddtjj|	dddd }t	j
| jtdd }|| }| | jd| j}|| jd | jd | jd  | jd | jd  d}|ddd }d	t	| }||d }|d ur|j d }||| || j|||dd }||dd }|d| j||}tjj|dd}| |}|d ur|| }t	||
}|dddd
 }| d d | jf }||}|r||f}|S |f}|S )Nr(   r   g      Y@)maxr   r   r%      r   )r*   r   r   r   r   r   r   	normalizeru   r    clampr   r   r   expr   r   r+   r   r   r/   r,   r-   sigmoidr   softmaxra   matmulr   r   )rJ   r   r   r   r   r0   r   r3   mixed_query_layer	key_layervalue_layerquery_layerattention_scoresr   relative_position_bias_tablerelative_position_bias
mask_shapeattention_probscontext_layernew_context_layer_shapeoutputsr#   r#   r$   rN   P  sT   

&


zSwin2SRSelfAttention.forwardNNF)r   r   r   rI   r   r    rS   r   r!   boolr   rN   rU   r#   r#   rK   r$   r     s"    ;r   c                       s8   e Zd Z fddZdejdejdejfddZ  ZS )Swin2SRSelfOutputc                    s*   t    t||| _t|j| _d S rG   )rH   rI   r   r   denser_   r   ra   rJ   rb   r   rK   r#   r$   rI     s   
zSwin2SRSelfOutput.__init__r   input_tensorr;   c                 C      |  |}| |}|S rG   r   ra   )rJ   r   r   r#   r#   r$   rN        

zSwin2SRSelfOutput.forwardr   r   r   rI   r    rS   rN   rU   r#   r#   rK   r$   r     s    $r   c                       sd   e Zd Zd fdd	Zdd Z			ddejd	eej d
eej dee	 de
ej f
ddZ  ZS )Swin2SRAttentionr   c                    sL   t    t||||t|tjjr|n||fd| _t||| _	t
 | _d S )Nrb   r   r   r/   r   )rH   rI   r   rj   rk   rl   rm   rJ   r   rD   setpruned_heads)rJ   rb   r   r   r/   r   rK   r#   r$   rI     s   
	zSwin2SRAttention.__init__c                 C   s   t |dkrd S t|| jj| jj| j\}}t| jj|| j_t| jj|| j_t| jj	|| j_	t| j
j|dd| j
_| jjt | | j_| jj| jj | j_| j|| _d S )Nr   r   r   )lenr   rJ   r   r   r   r   r   r   r   rD   r   r   union)rJ   headsindexr#   r#   r$   prune_heads  s   zSwin2SRAttention.prune_headsNFr   r   r   r   r;   c                 C   s6   |  ||||}| |d |}|f|dd   }|S Nr   r   )rJ   rD   )rJ   r   r   r   r   self_outputsattention_outputr   r#   r#   r$   rN     s   zSwin2SRAttention.forwardr   r   )r   r   r   rI   r   r    rS   r   r!   r   r   rN   rU   r#   r#   rK   r$   r     s"    r   c                       2   e Zd Z fddZdejdejfddZ  ZS )Swin2SRIntermediatec                    sJ   t    t|t|j| | _t|jt	rt
|j | _d S |j| _d S rG   )rH   rI   r   r   rw   	mlp_ratior   rj   
hidden_actrT   r   intermediate_act_fnr   rK   r#   r$   rI     s
   
zSwin2SRIntermediate.__init__r   r;   c                 C   r   rG   )r   r   rM   r#   r#   r$   rN        

zSwin2SRIntermediate.forwardr   r#   r#   rK   r$   r     s    r   c                       r   )Swin2SROutputc                    s4   t    tt|j| || _t|j| _	d S rG   )
rH   rI   r   r   rw   r   r   r_   r`   ra   r   rK   r#   r$   rI     s   
zSwin2SROutput.__init__r   r;   c                 C   r   rG   r   rM   r#   r#   r$   rN     r   zSwin2SROutput.forwardr   r#   r#   rK   r$   r     s    r   c                       s   e Zd Zd fdd	Zdd Zdd Zdd	 Z	
		ddejde	e
e
f deej dee dee de	ejejf fddZ  ZS )Swin2SRLayerr   c                    s   t    |j| _|| _|j| _|| _| | t|||| jt|t	j
jr'|n||fd| _tj||jd| _|jdkrAt|jnt | _t||| _t||| _tj||jd| _d S )Nr   epsr7   )rH   rI   chunk_size_feed_forward
shift_sizer/   r~   set_shift_and_window_sizer   rj   rk   rl   rm   	attentionr   rq   layer_norm_epslayernorm_beforedrop_path_raterF   IdentityrE   r   intermediater   rD   layernorm_after)rJ   rb   r   r~   r   r   r   rK   r#   r$   rI     s(   

	zSwin2SRLayer.__init__c                 C   s   t | jtjjr| jn| j| jf}t | jtjjr| jn| j| jf}t|d r/|d  n|d }||d kr;|n|d | _|t | jtjjrL| jn| j| jfkrXd| _d S |d | _d S Nr   )	rj   r/   rk   rl   rm   r   r    	is_tensoritem)rJ   r~   target_window_sizetarget_shift_size
window_dimr#   r#   r$   r     s&   

"
z&Swin2SRLayer.set_shift_and_window_sizec              	   C   s  | j dkrtjd||df|d}td| j t| j | j  t| j  d f}td| j t| j | j  t| j  d f}d}|D ]}|D ]}	||d d ||	d d f< |d7 }qDq@t|| j}
|
d| j| j }
|
d|
d }||dkt	d|dkt	d}|S d }|S )Nr   r   r   r(   r%   g      Yr7   )
r   r    r\   slicer/   r5   r+   r   masked_fillrR   )rJ   r1   r2   r<   img_maskheight_sliceswidth_slicescountheight_slicewidth_slicemask_windows	attn_maskr#   r#   r$   get_attn_mask  s.   

$zSwin2SRLayer.get_attn_maskc                 C   sR   | j || j   | j  }| j || j   | j  }ddd|d|f}tj||}||fS r	  )r/   r   r   r   )rJ   r   r1   r2   	pad_right
pad_bottomr   r#   r#   r$   r   4  s
   zSwin2SRLayer.maybe_padNFr   r   r   r   always_partitionr;   c                 C   s  |s|  | n	 |\}}| \}}	}
|}|||||
}| |||\}}|j\}	}}}	| jdkrBtj|| j | j fdd}n|}t|| j	}|d| j	| j	 |
}| j
|||jd}|d urh||j}| j||||d}|d }|d| j	| j	|
}t|| j	||}| jdkrtj|| j| jfdd}n|}|d dkp|d dk}|r|d d d |d |d d f  }|||| |
}| |}|| | }| |}| |}|| | | }|r||d	 f}|S |f}|S )
Nr   )r   r%   )shiftsdimsr(   r   )r   r   r'   r   )r  r   r+   r   r*   r   r    rollr5   r/   r  r<   tor=   r  r6   r-   r  rE   r  rD   r  )rJ   r   r   r   r   r  r1   r2   r0   rv   channelsshortcutr   
height_pad	width_padshifted_hidden_stateshidden_states_windowsr  attention_outputsr   attention_windowsshifted_windows
was_paddedlayer_outputlayer_outputsr#   r#   r$   rN   ;  sN   

$


zSwin2SRLayer.forward)r   r   )NFF)r   r   r   rI   r  r  r   r    rS   r   rw   r   r!   r   rN   rU   r#   r#   rK   r$   r     s*    
r   c                       s`   e Zd ZdZd fdd	Z		ddejdeeef d	e	ej
 d
e	e deej f
ddZ  ZS )Swin2SRStagezh
    This corresponds to the Residual Swin Transformer Block (RSTB) in the original implementation.
    r   c                    s   t     | _| _t fddt|D | _ jdkr.t	ddd| _
n6 jdkrdtt	d dddtjdd	d
t	d d dddtjdd	d
t	d ddd| _
t dd| _t | _d S )Nc              
      s6   g | ]}t  |d  dkrdn jd  dqS )r%   r   )rb   r   r~   r   r   r   )r   r/   ).0irb   r   r~   r   r   r#   r$   
<listcomp>  s    	z)Swin2SRStage.__init__.<locals>.<listcomp>1convr   r   3convr&   皙?Tnegative_sloper   r   F)rs   )rH   rI   rb   r   r   
ModuleListrangelayersresi_connectionro   convr   	LeakyReLUrW   patch_embedry   patch_unembed)rJ   rb   r   r~   depthr   rE   r   rK   r0  r$   rI     s(   
	

zSwin2SRStage.__init__NFr   r   r   r   r;   c                 C   s   |}|\}}t | jD ]\}}	|d ur|| nd }
|	|||
|}|d }q||||f}| ||}| |}| |\}}|| }||f}|rO||dd  7 }|S r   )	enumerater9  r>  r;  r=  )rJ   r   r   r   r   residualr1   r2   r/  layer_modulelayer_head_maskr,  re   rv   stage_outputsr#   r#   r$   rN     s   

zSwin2SRStage.forwardr   )NF)r   r   r   r   rI   r    rS   r   rw   r   r!   r   rN   rU   r#   r#   rK   r$   r-  {  s     &
r-  c                       sp   e Zd Z fddZ				ddejdeeef deej	 d	ee
 d
ee
 dee
 deeef fddZ  ZS )Swin2SREncoderc                    sj   t    t j| _ | _dd td jt	 jD t
 fddt| jD | _d| _d S )Nc                 S   s   g | ]}|  qS r#   )r  )r.  r   r#   r#   r$   r1    s    z+Swin2SREncoder.__init__.<locals>.<listcomp>r   c                    sd   g | ].}t   jd  d f j|  j| t jd| t jd|d   d dqS )r   r   N)rb   r   r~   r?  r   rE   r   )r-  r]   depthsr   r   )r.  	stage_idxrb   dpr	grid_sizer#   r$   r1    s    
*F)rH   rI   r   rF  
num_stagesrb   r    linspacer  r   r   r7  r8  stagesgradient_checkpointing)rJ   rb   rJ  rK   rH  r$   rI     s   
 

zSwin2SREncoder.__init__NFTr   r   r   r   output_hidden_statesreturn_dictr;   c                 C   s   d}|rdnd }|rdnd }	|r||f7 }t | jD ]L\}
}|d ur&||
 nd }| jr9| jr9| |j||||}n|||||}|d }|d }|d |d f}||f7 }|r\||f7 }|rf|	|dd  7 }	q|sutdd |||	fD S t|||	d	S )
Nr#   r   r   r   r(   r%   c                 s   s    | ]	}|d ur|V  qd S rG   r#   )r.  vr#   r#   r$   	<genexpr>   s    z)Swin2SREncoder.forward.<locals>.<genexpr>r   r   r   )r@  rM  rN  r:   _gradient_checkpointing_func__call__tupler   )rJ   r   r   r   r   rO  rP  all_input_dimensionsall_hidden_statesall_self_attentionsr/  stage_modulerC  r,  re   r#   r#   r$   rN     s8   	


zSwin2SREncoder.forward)NFFT)r   r   r   rI   r    rS   r   rw   r   r!   r   r   r   rN   rU   r#   r#   rK   r$   rE    s*    

rE  c                   @   s(   e Zd ZdZeZdZdZdZdd Z	dS )Swin2SRPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    swin2srrc   Tc                 C   sx   t |tjtjfr%tjjj|jj| j	j
d |jdur#|jj  dS dS t |tjr:|jj  |jjd dS dS )zInitialize the weights)stdNr   )rj   r   r   ro   r    inittrunc_normal_weightdatarb   initializer_ranger   zero_rq   fill_)rJ   moduler#   r#   r$   _init_weights  s   
z$Swin2SRPreTrainedModel._init_weightsN)
r   r   r   r   r   config_classbase_model_prefixmain_input_namesupports_gradient_checkpointingrf  r#   r#   r#   r$   r[  	  s    r[  aJ  
    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) sub-class. Use
    it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and
    behavior.

    Parameters:
        config ([`Swin2SRConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
aN  
    Args:
        pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
            [`Swin2SRImageProcessor.__call__`] for details.
        head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:

            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.

        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
zaThe bare Swin2SR Model transformer outputting raw hidden-states without any specific head on top.c                       s   e Zd Z fddZdd Zdd Zdd Zeee	e
eed	ed
				ddejdeej dee dee dee deeef fddZ  ZS )Swin2SRModelc                    s   t  | || _|jdkr"|jdkr"d}t|dddd| _n	t	dddd| _|j
| _
t|j|jddd| _t|| _t|| jjjd| _tj|j|jd| _t|| _t|j|jddd| _|   d S )Nr   )gw#?g8EGr?gB`"?r   )rJ  r   )rH   rI   rb   r3   num_channels_outr    rS   r+   meanr\   	img_ranger   ro   r]   first_convolutionrV   rd   rE  rX   rn   encoderrq   r  rr   ry   r>  conv_after_body	post_init)rJ   rb   rgb_meanrK   r#   r$   rI   E  s   

zSwin2SRModel.__init__c                 C   s   | j jS rG   )rd   rX   rP   r#   r#   r$   get_input_embeddings[  s   z!Swin2SRModel.get_input_embeddingsc                 C   s*   |  D ]\}}| jj| j| qdS )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsrp  layerr  r   )rJ   heads_to_prunerv  r   r#   r#   r$   _prune_heads^  s   zSwin2SRModel._prune_headsc                 C   sr   |  \}}}}| jj}|||  | }|||  | }tj|d|d|fd}| j|| _|| j | j }|S )Nr   reflect)	r   rb   r/   r   r   r   rm  type_asrn  )rJ   rc   rv   r1   r2   r/   modulo_pad_heightmodulo_pad_widthr#   r#   r$   pad_and_normalizef  s   zSwin2SRModel.pad_and_normalizevision)
checkpointoutput_typerg  modalityexpected_outputNrc   r   r   rO  rP  r;   c                 C   s   |d ur|n| j j}|d ur|n| j j}|d ur|n| j j}| |t| j j}|j\}}}}| |}| 	|}	| 
|	\}
}| j|
|||||d}|d }| |}| |||f}| ||	 }|sp|f|dd   }|S t||j|jdS )Nr   r   rO  rP  r   r   rS  )rb   r   rO  use_return_dictget_head_maskr   rF  r*   r}  ro  rd   rp  rr   r>  rq  r	   r   r   )rJ   rc   r   r   rO  rP  rv   r1   r2   rd   embedding_outputr   encoder_outputssequence_outputrD   r#   r#   r$   rN   u  s:   

	
zSwin2SRModel.forward)NNNN)r   r   r   rI   rt  rx  r}  r   SWIN2SR_INPUTS_DOCSTRINGr   _CHECKPOINT_FOR_DOCr	   _CONFIG_FOR_DOC_EXPECTED_OUTPUT_SHAPEr    r!   r   r   r   r   rN   rU   r#   r#   rK   r$   rk  @  s<    

rk  c                       rx   )UpsamplezUpsample module.

    Args:
        scale (`int`):
            Scale factor. Supported scales: 2^n and 3.
        num_features (`int`):
            Channel number of intermediate features.
    c                    s   t    || _||d @ dkr=ttt|dD ] }| d| t	|d| ddd | d| t
d qd S |dkrUt	|d| ddd| _t
d| _d S td	| d
)Nr   r   r%   convolution_r&   r   pixelshuffle_	   zScale z/ is not supported. Supported scales: 2^n and 3.)rH   rI   scaler8  rw   r   r   
add_moduler   ro   PixelShuffleconvolutionpixelshuffler   )rJ   r  num_featuresr/  rK   r#   r$   rI     s   
$zUpsample.__init__c                 C   s~   | j | j d @ dkr.ttt| j dD ]}| d| |}| d| |}q|S | j dkr=| |}| |}|S )Nr   r   r%   r  r  r   )r  r8  rw   r   r   __getattr__r  r  )rJ   hidden_stater/  r#   r#   r$   rN     s   


zUpsample.forwardr|   r#   r#   rK   r$   r    s    	r  c                       rx   )UpsampleOneStepa  UpsampleOneStep module (the difference with Upsample is that it always only has 1conv + 1pixelshuffle)

    Used in lightweight SR to save parameters.

    Args:
        scale (int):
            Scale factor. Supported scales: 2^n and 3.
        in_channels (int):
            Channel number of intermediate features.
        out_channels (int):
            Channel number of output features.
    c                    s6   t    t||d | ddd| _t|| _d S )Nr%   r   r   )rH   rI   r   ro   r;  r  pixel_shuffle)rJ   r  in_channelsout_channelsrK   r#   r$   rI     s   
zUpsampleOneStep.__init__c                 C   r   rG   )r;  r  )rJ   r   r#   r#   r$   rN     r   zUpsampleOneStep.forwardr|   r#   r#   rK   r$   r    s    r  c                       $   e Zd Z fddZdd Z  ZS )PixelShuffleUpsamplerc                    sV   t    t|j|ddd| _tjdd| _t|j	|| _
t||jddd| _d S Nr   r   Tr   )rH   rI   r   ro   r]   conv_before_upsampler<  
activationr  upscaleupsamplerl  final_convolutionrJ   rb   r  rK   r#   r$   rI     s
   
zPixelShuffleUpsampler.__init__c                 C   s,   |  |}| |}| |}| |}|S rG   )r  r  r  r  )rJ   r  r   r#   r#   r$   rN     s
   



zPixelShuffleUpsampler.forwardr   r   r   rI   rN   rU   r#   r#   rK   r$   r    s    r  c                       r  )NearestConvUpsamplerc                    s   t    |jdkrtdt|j|ddd| _tjdd| _	t||ddd| _
t||ddd| _t||ddd| _t||jddd| _tjddd| _d S )	Nr&   zNThe nearest+conv upsampler only supports an upscale factor of 4 at the moment.r   r   Tr   r4  r5  )rH   rI   r  r   r   ro   r]   r  r<  r  conv_up1conv_up2conv_hrrl  r  lrelur  rK   r#   r$   rI     s   

zNearestConvUpsampler.__init__c              	   C   sn   |  |}| |}| | tjjj|ddd}| | tjjj|ddd}| 	| | 
|}|S )Nr%   nearest)scale_factormode)r  r  r  r  r    r   r   interpolater  r  r  )rJ   r  reconstructionr#   r#   r$   rN     s   

zNearestConvUpsampler.forwardr  r#   r#   rK   r$   r    s    r  c                       r  )PixelShuffleAuxUpsamplerc              	      s   t    |j| _t|j|ddd| _t|j|ddd| _tj	dd| _
t||jddd| _ttd|dddtj	dd| _t|j|| _t||jddd| _d S r  )rH   rI   r  r   ro   r3   conv_bicubicr]   r  r<  r  conv_auxr   conv_after_auxr  r  rl  r  r  rK   r#   r$   rI   #  s   
$z!PixelShuffleAuxUpsampler.__init__c                 C   s   |  |}| |}| |}| |}| |}| |d d d d d || j d || j f |d d d d d || j d || j f  }| |}||fS rG   )r  r  r  r  r  r  r  r  )rJ   r  bicubicr1   r2   auxr  r#   r#   r$   rN   /  s   




0*
z PixelShuffleAuxUpsampler.forwardr  r#   r#   rK   r$   r  "  s    r  zm
    Swin2SR Model transformer with an upsampler head on top for image super resolution and restoration.
    c                       s   e Zd Z fddZeeeeed						dde	e
j de	e
j de	e
j de	e d	e	e d
e	e deeef fddZ  ZS )Swin2SRForImageSuperResolutionc                    s   t  | t|| _|j| _|j| _d}| jdkr!t||| _n4| jdkr-t||| _n(| jdkr=t	|j|j
|j| _n| jdkrIt||| _nt|j
|jddd| _|   d S )N@   r  pixelshuffle_auxpixelshuffledirectnearest+convr   r   )rH   rI   rk  r\  	upsamplerr  r  r  r  r  r]   rl  r  r   ro   r  rr  r  rK   r#   r$   rI   E  s   




z'Swin2SRForImageSuperResolution.__init__)r  rg  Nrc   r   labelsr   rO  rP  r;   c                 C   s^  |dur|n| j j}|jdd \}}| j jdkr+tjj||| j || j fddd}	| j|||||d}
|
d }| jd	v rD| 	|}n!| jdkr^| 	||	||\}}|| jj
 | jj }n|| | }|| jj
 | jj }|ddddd|| j d|| j f }d}|durtd
|s|f|
dd  }|dur|f| S |S t|||
j|
jdS )a  
        Returns:

        Example:
         ```python
         >>> import torch
         >>> import numpy as np
         >>> from PIL import Image
         >>> import requests

         >>> from transformers import AutoImageProcessor, Swin2SRForImageSuperResolution

         >>> processor = AutoImageProcessor.from_pretrained("caidas/swin2SR-classical-sr-x2-64")
         >>> model = Swin2SRForImageSuperResolution.from_pretrained("caidas/swin2SR-classical-sr-x2-64")

         >>> url = "https://huggingface.co/spaces/jjourney1125/swin2sr/resolve/main/samples/butterfly.jpg"
         >>> image = Image.open(requests.get(url, stream=True).raw)
         >>> # prepare image for the model
         >>> inputs = processor(image, return_tensors="pt")

         >>> # forward pass
         >>> with torch.no_grad():
         ...     outputs = model(**inputs)

         >>> output = outputs.reconstruction.data.squeeze().float().cpu().clamp_(0, 1).numpy()
         >>> output = np.moveaxis(output, source=0, destination=-1)
         >>> output = (output * 255.0).round().astype(np.uint8)  # float32 to uint8
         >>> # you can visualize `output` with `Image.fromarray`
         ```Nr%   r  r  F)r   r  align_cornersr  r   )r  r  r  z'Training is not supported at the momentr   )lossr  r   r   )rb   r  r*   r  r   r   r  r  r\  r  rn  rm  r  NotImplementedErrorr
   r   r   )rJ   rc   r   r  r   rO  rP  r1   r2   r  r   r  r  r  r  rD   r#   r#   r$   rN   _  sJ   (

,z&Swin2SRForImageSuperResolution.forward)NNNNNN)r   r   r   rI   r   r  r   r
   r  r   r    r!   
LongTensorr   r   r   rN   rU   r#   r#   rK   r$   r  >  s2    

r  )r7   F)Gr   collections.abcrk   r   dataclassesr   typingr   r   r   r    torch.utils.checkpointr   activationsr   modeling_outputsr	   r
   modeling_utilsr   pytorch_utilsr   r   r   utilsr   r   r   r   r   r   configuration_swin2srr   
get_loggerr   loggerr  r  r  %SWIN2SR_PRETRAINED_MODEL_ARCHIVE_LISTr   r5   r6   rS   rR   r   rE   r   rF   rV   rW   ry   r}   r   r   r   r   r   r   r-  rE  r[  SWIN2SR_START_DOCSTRINGr  rk  r  r  r  r  r  r  r#   r#   r#   r$   <module>   st    
 7 / GGq&