o
    hտ                     @   s,  d Z ddlZddlmZ ddlmZ ddlmZmZm	Z	 ddl
Z
ddlZ
ddl
mZ ddlmZmZmZ dd	lmZ e rCdd
lmZ ddlmZ ddlmZmZmZmZmZmZmZmZm Z  ddl!m"Z" ddl#m$Z$ ddlm%Z%m&Z&m'Z'm(Z(m)Z) ddl*m+Z+ e(,e-Z.dZ/dZ0ddgZ1dd Z2dd Z3dd Z4G dd dej5Z6G dd dej5Z7G dd  d ej5Z8G d!d" d"ej5Z9G d#d$ d$ej5Z:G d%d& d&ej5Z;G d'd( d(ej5Z<G d)d* d*ej5Z=G d+d, d,ej5Z>G d-d. d.ej5Z?G d/d0 d0ej5Z@G d1d2 d2ej5ZAG d3d4 d4ej5ZBG d5d6 d6ej5ZCG d7d8 d8e"ZDeG d9d: d:eZEd;ZFd<ZGe&d=eFG d>d? d?eDZHe&d@eFG dAdB dBeDZIe&dCeFG dDdE dEeDZJe&dFeFG dGdH dHeDZKe&dIeFG dJdK dKeDZLe&dLeFG dMdN dNeDZMe&dOeFG dPdQ dQeDZNe&dReFG dSdT dTeDZOdS )Uz PyTorch FNet model.    N)	dataclass)partial)OptionalTupleUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )is_scipy_available)linalg)ACT2FN)	BaseModelOutputBaseModelOutputWithPoolingMaskedLMOutputModelOutputMultipleChoiceModelOutputNextSentencePredictorOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel)apply_chunking_to_forward)add_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardloggingreplace_return_docstrings   )
FNetConfigzgoogle/fnet-baser    zgoogle/fnet-largec                 C   s:   | j d }|d|d|f }| tj} td| ||S )z4Applies 2D matrix multiplication to 3D input arrays.r   Nzbij,jk,ni->bnk)shapetypetorch	complex64einsum)xmatrix_dim_onematrix_dim_two
seq_length r*   \/var/www/html/ai/venv/lib/python3.10/site-packages/transformers/models/fnet/modeling_fnet.py_two_dim_matmulF   s   
r,   c                 C   s   t | ||S N)r,   )r&   r'   r(   r*   r*   r+   two_dim_matmulO      r.   c                 C   s4   | }t t| jdd D ]
}tjj||d}q|S )z
    Applies n-dimensional Fast Fourier Transform (FFT) to input array.

    Args:
        x: Input n-dimensional array.

    Returns:
        n-dimensional Fourier transform of input n-dimensional array.
    r   N)axis)reversedrangendimr#   fft)r&   outr0   r*   r*   r+   fftnT   s   
r6   c                       s*   e Zd ZdZ fddZdddZ  ZS )FNetEmbeddingszGConstruct the embeddings from word, position and token_type embeddings.c                    s   t    tj|j|j|jd| _t|j|j| _	t|j
|j| _tj|j|jd| _t|j|j| _t|j| _| jdt|jddd | jdtj| j tjddd d S )	N)padding_idxepsposition_ids)r   F)
persistenttoken_type_idsdtype)super__init__r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsLinear
projectionDropouthidden_dropout_probdropoutregister_bufferr#   arangeexpandzerosr;   sizelongselfconfig	__class__r*   r+   rB   g   s   

zFNetEmbeddings.__init__Nc                 C   s   |d ur	|  }n|  d d }|d }|d u r$| jd d d |f }|d u rNt| drC| jd d d |f }||d |}|}ntj|tj| jjd}|d u rW| 	|}| 
|}	||	 }
| |}|
|7 }
| |
}
| |
}
| |
}
|
S )Nr<   r   r>   r   r@   device)rW   r;   hasattrr>   rU   r#   rV   rX   r_   rG   rK   rI   rL   rO   rR   )rZ   	input_idsr>   r;   inputs_embedsinput_shaper)   buffered_token_type_ids buffered_token_type_ids_expandedrK   
embeddingsrI   r*   r*   r+   forward}   s,   







zFNetEmbeddings.forward)NNNN)__name__
__module____qualname____doc__rB   rg   __classcell__r*   r*   r\   r+   r7   d   s    r7   c                       ,   e Zd Z fddZdd Zdd Z  ZS )FNetBasicFourierTransformc                    s   t    | | d S r-   )rA   rB   _init_fourier_transformrY   r\   r*   r+   rB         
z"FNetBasicFourierTransform.__init__c                 C   s   |j sttjjdd| _d S |jdkrLt rB| dtj	t
|jtjd | dtj	t
|jtjd tt| j| jd| _d S td t| _d S t| _d S )	N)r      dim   dft_mat_hiddenr?   dft_mat_seq)r'   r(   zpSciPy is needed for DFT matrix calculation and is not found. Using TPU optimized fast fourier transform instead.)use_tpu_fourier_optimizationsr   r#   r4   r6   fourier_transformrH   r   rS   tensorr   dftrE   r$   tpu_short_seq_lengthr.   rv   ru   r   warningrY   r*   r*   r+   ro      s$   



z1FNetBasicFourierTransform._init_fourier_transformc                 C   s   |  |j}|fS r-   )rx   real)rZ   hidden_statesoutputsr*   r*   r+   rg      s   z!FNetBasicFourierTransform.forward)rh   ri   rj   rB   ro   rg   rl   r*   r*   r\   r+   rn      s    rn   c                       $   e Zd Z fddZdd Z  ZS )FNetBasicOutputc                    s"   t    tj|j|jd| _d S Nr9   )rA   rB   r   rL   rE   rM   rY   r\   r*   r+   rB      s   
zFNetBasicOutput.__init__c                 C   s   |  || }|S r-   )rL   rZ   r~   input_tensorr*   r*   r+   rg      s   zFNetBasicOutput.forwardrh   ri   rj   rB   rg   rl   r*   r*   r\   r+   r          r   c                       r   )FNetFourierTransformc                    s"   t    t|| _t|| _d S r-   )rA   rB   rn   rZ   r   outputrY   r\   r*   r+   rB      s   

zFNetFourierTransform.__init__c                 C   s$   |  |}| |d |}|f}|S Nr   )rZ   r   )rZ   r~   self_outputsfourier_outputr   r*   r*   r+   rg      s   
zFNetFourierTransform.forwardr   r*   r*   r\   r+   r          r   c                       2   e Zd Z fddZdejdejfddZ  ZS )FNetIntermediatec                    sD   t    t|j|j| _t|jt	rt
|j | _d S |j| _d S r-   )rA   rB   r   rN   rE   intermediate_sizedense
isinstance
hidden_actstrr   intermediate_act_fnrY   r\   r*   r+   rB      s
   
zFNetIntermediate.__init__r~   returnc                 C      |  |}| |}|S r-   )r   r   rZ   r~   r*   r*   r+   rg         

zFNetIntermediate.forwardrh   ri   rj   rB   r#   Tensorrg   rl   r*   r*   r\   r+   r      s    r   c                       s8   e Zd Z fddZdejdejdejfddZ  ZS )
FNetOutputc                    sB   t    t|j|j| _tj|j|jd| _t	|j
| _d S r   )rA   rB   r   rN   r   rE   r   rL   rM   rP   rQ   rR   rY   r\   r*   r+   rB      s   
zFNetOutput.__init__r~   r   r   c                 C   s&   |  |}| |}| || }|S r-   )r   rR   rL   r   r*   r*   r+   rg      s   

zFNetOutput.forwardr   r*   r*   r\   r+   r      s    $r   c                       rm   )	FNetLayerc                    s:   t    |j| _d| _t|| _t|| _t|| _	d S Nr   )
rA   rB   chunk_size_feed_forwardseq_len_dimr   fourierr   intermediater   r   rY   r\   r*   r+   rB      s   


zFNetLayer.__init__c                 C   s0   |  |}|d }t| j| j| j|}|f}|S r   )r   r   feed_forward_chunkr   r   )rZ   r~   self_fourier_outputsr   layer_outputr   r*   r*   r+   rg     s   
zFNetLayer.forwardc                 C   s   |  |}| ||}|S r-   )r   r   )rZ   r   intermediate_outputr   r*   r*   r+   r     s   
zFNetLayer.feed_forward_chunk)rh   ri   rj   rB   rg   r   rl   r*   r*   r\   r+   r      s    r   c                       s&   e Zd Z fddZdddZ  ZS )FNetEncoderc                    s:   t     | _t fddt jD | _d| _d S )Nc                    s   g | ]}t  qS r*   )r   ).0_r[   r*   r+   
<listcomp>  s    z(FNetEncoder.__init__.<locals>.<listcomp>F)	rA   rB   r[   r   
ModuleListr2   num_hidden_layerslayergradient_checkpointingrY   r\   r   r+   rB     s   
 
zFNetEncoder.__init__FTc                 C   s   |rdnd }t | jD ]!\}}|r||f }| jr$| jr$| |j|}n||}|d }q|r4||f }|sAtdd ||fD S t||dS )Nr*   r   c                 s   s    | ]	}|d ur|V  qd S r-   r*   )r   vr*   r*   r+   	<genexpr>1  s    z&FNetEncoder.forward.<locals>.<genexpr>)last_hidden_stater~   )	enumerater   r   training_gradient_checkpointing_func__call__tupler   )rZ   r~   output_hidden_statesreturn_dictall_hidden_statesilayer_modulelayer_outputsr*   r*   r+   rg     s   


zFNetEncoder.forward)FTr   r*   r*   r\   r+   r     s    r   c                       r   )
FNetPoolerc                    s*   t    t|j|j| _t | _d S r-   )rA   rB   r   rN   rE   r   Tanh
activationrY   r\   r*   r+   rB   8  s   
zFNetPooler.__init__r~   r   c                 C   s(   |d d df }|  |}| |}|S r   )r   r   )rZ   r~   first_token_tensorpooled_outputr*   r*   r+   rg   =  s   

zFNetPooler.forwardr   r*   r*   r\   r+   r   7  s    r   c                       r   )FNetPredictionHeadTransformc                    sV   t    t|j|j| _t|jtrt	|j | _
n|j| _
tj|j|jd| _d S r   )rA   rB   r   rN   rE   r   r   r   r   r   transform_act_fnrL   rM   rY   r\   r*   r+   rB   H  s   
z$FNetPredictionHeadTransform.__init__r~   r   c                 C   s"   |  |}| |}| |}|S r-   )r   r   rL   r   r*   r*   r+   rg   Q  s   


z#FNetPredictionHeadTransform.forwardr   r*   r*   r\   r+   r   G  s    	r   c                       rm   )FNetLMPredictionHeadc                    sH   t    t|| _t|j|j| _t	t
|j| _| j| j_d S r-   )rA   rB   r   	transformr   rN   rE   rD   decoder	Parameterr#   rV   biasrY   r\   r*   r+   rB   Y  s
   

zFNetLMPredictionHead.__init__c                 C   r   r-   )r   r   r   r*   r*   r+   rg   d  r   zFNetLMPredictionHead.forwardc                 C   s   | j j| _d S r-   )r   r   rZ   r*   r*   r+   _tie_weightsi  s   z!FNetLMPredictionHead._tie_weights)rh   ri   rj   rB   rg   r   rl   r*   r*   r\   r+   r   X  s    r   c                       r   )FNetOnlyMLMHeadc                    s   t    t|| _d S r-   )rA   rB   r   predictionsrY   r\   r*   r+   rB   o  rp   zFNetOnlyMLMHead.__init__c                 C      |  |}|S r-   )r   )rZ   sequence_outputprediction_scoresr*   r*   r+   rg   s     
zFNetOnlyMLMHead.forwardr   r*   r*   r\   r+   r   n  r   r   c                       r   )FNetOnlyNSPHeadc                    s   t    t|jd| _d S Nrq   )rA   rB   r   rN   rE   seq_relationshiprY   r\   r*   r+   rB   z  s   
zFNetOnlyNSPHead.__init__c                 C   r   r-   )r   )rZ   r   seq_relationship_scorer*   r*   r+   rg   ~  r   zFNetOnlyNSPHead.forwardr   r*   r*   r\   r+   r   y  r   r   c                       r   )FNetPreTrainingHeadsc                    s(   t    t|| _t|jd| _d S r   )rA   rB   r   r   r   rN   rE   r   rY   r\   r*   r+   rB     s   

zFNetPreTrainingHeads.__init__c                 C   s   |  |}| |}||fS r-   )r   r   )rZ   r   r   r   r   r*   r*   r+   rg     s   

zFNetPreTrainingHeads.forwardr   r*   r*   r\   r+   r     r   r   c                   @   s$   e Zd ZdZeZdZdZdd ZdS )FNetPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    fnetTc                 C   s   t |tjr |jjjd| jjd |jdur|jj	  dS dS t |tj
rC|jjjd| jjd |jdurA|jj|j 	  dS dS t |tjrX|jj	  |jjd dS dS )zInitialize the weightsg        )meanstdNg      ?)r   r   rN   weightdatanormal_r[   initializer_ranger   zero_rC   r8   rL   fill_)rZ   moduler*   r*   r+   _init_weights  s   

z!FNetPreTrainedModel._init_weightsN)	rh   ri   rj   rk   r    config_classbase_model_prefixsupports_gradient_checkpointingr   r*   r*   r*   r+   r     s    r   c                   @   sV   e Zd ZU dZdZeej ed< dZ	ejed< dZ
ejed< dZeeej  ed< dS )FNetForPreTrainingOutputa  
    Output type of [`FNetForPreTraining`].

    Args:
        loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
            Total loss as the sum of the masked language modeling loss and the next sequence prediction
            (classification) loss.
        prediction_logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
            Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
        seq_relationship_logits (`torch.FloatTensor` of shape `(batch_size, 2)`):
            Prediction scores of the next sequence prediction (classification) head (scores of True/False continuation
            before SoftMax).
        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) of
            shape `(batch_size, sequence_length, hidden_size)`. Hidden-states of the model at the output of each layer
            plus the initial embedding outputs.
    Nlossprediction_logitsseq_relationship_logitsr~   )rh   ri   rj   rk   r   r   r#   FloatTensor__annotations__r   r   r~   r   r*   r*   r*   r+   r     s   
 r   aG  
    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) sub-class. Use
    it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and
    behavior.

    Parameters:
        config ([`FNetConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
a  
    Args:
        input_ids (`torch.LongTensor` of shape `({0})`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        token_type_ids (`torch.LongTensor` of shape `({0})`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)
        position_ids (`torch.LongTensor` of shape `({0})`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)

        inputs_embeds (`torch.FloatTensor` of shape `({0}, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert *input_ids* indices into associated vectors than the
            model's internal embedding lookup matrix.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
z^The bare FNet Model transformer outputting raw hidden-states without any specific head on top.c                       s   e Zd ZdZd fdd	Zdd Zdd Zee	d	e
eeed
						ddeej deej deej deej dee dee deeef fddZ  ZS )	FNetModelz

    The model can behave as an encoder, following the architecture described in [FNet: Mixing Tokens with Fourier
    Transforms](https://arxiv.org/abs/2105.03824) by James Lee-Thorp, Joshua Ainslie, Ilya Eckstein, Santiago Ontanon.

    Tc                    sD   t  | || _t|| _t|| _|rt|nd | _| 	  d S r-   )
rA   rB   r[   r7   rf   r   encoderr   pooler	post_init)rZ   r[   add_pooling_layerr\   r*   r+   rB      s   

zFNetModel.__init__c                 C   s   | j jS r-   rf   rG   r   r*   r*   r+   get_input_embeddings  s   zFNetModel.get_input_embeddingsc                 C   s   || j _d S r-   r   )rZ   valuer*   r*   r+   set_input_embeddings  r/   zFNetModel.set_input_embeddingsbatch_size, sequence_length
checkpointoutput_typer   Nra   r>   r;   rb   r   r   r   c                 C   sv  |d ur|n| j j}|d ur|n| j j}|d ur |d ur td|d ur-| }|\}}	n|d ur>| d d }|\}}	ntd| j jrT|	dkrT| j j|	krTtd|d ur[|jn|j}
|d u rt| j	dr}| j	j
d d d |	f }|||	}|}n	tj|tj|
d}| j	||||d}| j|||d	}|d
 }| jd ur| |nd }|s||f|dd   S t|||jdS )NzDYou cannot specify both input_ids and inputs_embeds at the same timer<   z5You have to specify either input_ids or inputs_embedsrt   zThe `tpu_short_seq_length` in FNetConfig should be set equal to the sequence length being passed to the model when using TPU optimizations.r>   r^   )ra   r;   r>   rb   )r   r   r   r   )r   pooler_outputr~   )r[   r   use_return_dict
ValueErrorrW   rw   r{   r_   r`   rf   r>   rU   r#   rV   rX   r   r   r   r~   )rZ   ra   r>   r;   rb   r   r   rc   
batch_sizer)   r_   rd   re   embedding_outputencoder_outputsr   r   r*   r*   r+   rg     s\   

zFNetModel.forward)T)NNNNNN)rh   ri   rj   rk   rB   r   r   r   FNET_INPUTS_DOCSTRINGformatr   _CHECKPOINT_FOR_DOCr   _CONFIG_FOR_DOCr   r#   
LongTensorr   boolr   r   rg   rl   r*   r*   r\   r+   r     s@    
r   z
    FNet Model with two heads on top as done during the pretraining: a `masked language modeling` head and a `next
    sentence prediction (classification)` head.
    c                       s   e Zd ZddgZ fddZdd Zdd Zee	d	e
eed
								ddeej deej deej deej deej deej dee dee deeef fddZ  ZS )FNetForPreTrainingcls.predictions.decoder.biascls.predictions.decoder.weightc                    ,   t  | t|| _t|| _|   d S r-   )rA   rB   r   r   r   clsr   rY   r\   r*   r+   rB   h     

zFNetForPreTraining.__init__c                 C   
   | j jjS r-   r  r   r   r   r*   r*   r+   get_output_embeddingsq     
z(FNetForPreTraining.get_output_embeddingsc                 C      || j j_d S r-   r  rZ   new_embeddingsr*   r*   r+   set_output_embeddingst     z(FNetForPreTraining.set_output_embeddingsr   r   r   Nra   r>   r;   rb   labelsnext_sentence_labelr   r   r   c	                 C   s   |dur|n| j j}| j||||||d}	|	dd \}
}| |
|\}}d}|durP|durPt }||d| j j|d}||dd|d}|| }|sg||f|	dd  }|dure|f| S |S t||||	jdS )a  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
        next_sentence_label (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair
            (see `input_ids` docstring) Indices should be in `[0, 1]`:

            - 0 indicates sequence B is a continuation of sequence A,
            - 1 indicates sequence B is a random sequence.
        kwargs (`Dict[str, any]`, optional, defaults to *{}*):
            Used to hide legacy arguments that have been deprecated.

        Returns:

        Example:

        ```python
        >>> from transformers import AutoTokenizer, FNetForPreTraining
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("google/fnet-base")
        >>> model = FNetForPreTraining.from_pretrained("google/fnet-base")
        >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
        >>> outputs = model(**inputs)
        >>> prediction_logits = outputs.prediction_logits
        >>> seq_relationship_logits = outputs.seq_relationship_logits
        ```Nr>   r;   rb   r   r   rq   r<   )r   r   r   r~   )	r[   r   r   r  r	   viewrD   r   r~   )rZ   ra   r>   r;   rb   r  r  r   r   r   r   r   r   r   
total_lossloss_fctmasked_lm_lossnext_sentence_lossr   r*   r*   r+   rg   w  s4   *	zFNetForPreTraining.forwardNNNNNNNN)rh   ri   rj   _tied_weights_keysrB   r  r  r   r   r   r   r   r  r   r#   r   r  r   r   rg   rl   r*   r*   r\   r+   r  ^  sD    	
	

r  z2FNet Model with a `language modeling` head on top.c                       s   e Zd ZddgZ fddZdd Zdd Zee	d	e
eeed
							ddeej deej deej deej deej dee dee deeef fddZ  ZS )FNetForMaskedLMr  r  c                    r  r-   )rA   rB   r   r   r   r  r   rY   r\   r*   r+   rB     r	  zFNetForMaskedLM.__init__c                 C   r
  r-   r  r   r*   r*   r+   r    r  z%FNetForMaskedLM.get_output_embeddingsc                 C   r  r-   r  r  r*   r*   r+   r    r  z%FNetForMaskedLM.set_output_embeddingsr   r   Nra   r>   r;   rb   r  r   r   r   c                 C   s   |dur|n| j j}| j||||||d}|d }	| |	}
d}|dur5t }||
d| j j|d}|sK|
f|dd  }|durI|f| S |S t||
|jdS )a  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
        Nr  r   r<   rq   r   logitsr~   )	r[   r   r   r  r	   r  rD   r   r~   )rZ   ra   r>   r;   rb   r  r   r   r   r   r   r  r  r   r*   r*   r+   rg     s&   	
zFNetForMaskedLM.forwardNNNNNNN)rh   ri   rj   r  rB   r  r  r   r   r   r   r   r   r  r   r#   r   r  r   r   rg   rl   r*   r*   r\   r+   r    sF    	
	r  zJFNet Model with a `next sentence prediction (classification)` head on top.c                       s   e Zd Z fddZeedeee	d							dde
ej de
ej de
ej d	e
ej d
e
ej de
e de
e deeef fddZ  ZS )FNetForNextSentencePredictionc                    r  r-   )rA   rB   r   r   r   r  r   rY   r\   r*   r+   rB   
  r	  z&FNetForNextSentencePrediction.__init__r   r  Nra   r>   r;   rb   r  r   r   r   c                 K   s   d|v rt dt |d}|dur|n| jj}| j||||||d}	|	d }
| |
}d}|durBt }||	dd|	d}|sX|f|	dd  }|durV|f| S |S t
|||	jdS )	a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair
            (see `input_ids` docstring). Indices should be in `[0, 1]`:

            - 0 indicates sequence B is a continuation of sequence A,
            - 1 indicates sequence B is a random sequence.

        Returns:

        Example:

        ```python
        >>> from transformers import AutoTokenizer, FNetForNextSentencePrediction
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("google/fnet-base")
        >>> model = FNetForNextSentencePrediction.from_pretrained("google/fnet-base")
        >>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
        >>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
        >>> encoding = tokenizer(prompt, next_sentence, return_tensors="pt")
        >>> outputs = model(**encoding, labels=torch.LongTensor([1]))
        >>> logits = outputs.logits
        >>> assert logits[0, 0] < logits[0, 1]  # next sentence was random
        ```r  zoThe `next_sentence_label` argument is deprecated and will be removed in a future version, use `labels` instead.Nr  r   r<   rq   r  )warningswarnFutureWarningpopr[   r   r   r  r	   r  r   r~   )rZ   ra   r>   r;   rb   r  r   r   kwargsr   r   seq_relationship_scoresr  r  r   r*   r*   r+   rg     s:   '
	
z%FNetForNextSentencePrediction.forwardr!  )rh   ri   rj   rB   r   r   r   r   r   r  r   r#   r   r  r   r   rg   rl   r*   r*   r\   r+   r"    s8    	


r"  z
    FNet Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled
    output) e.g. for GLUE tasks.
    c                          e Zd Z fddZeedeee	e
d							ddeej deej deej d	eej d
eej dee dee deee	f fddZ  ZS )FNetForSequenceClassificationc                    J   t  | |j| _t|| _t|j| _t	|j
|j| _|   d S r-   rA   rB   
num_labelsr   r   r   rP   rQ   rR   rN   rE   
classifierr   rY   r\   r*   r+   rB   i  s   
z&FNetForSequenceClassification.__init__r   r   Nra   r>   r;   rb   r  r   r   r   c                 C   sh  |dur|n| j j}| j||||||d}|d }	| |	}	| |	}
d}|dur| j jdu rS| jdkr9d| j _n| jdkrO|jtj	ksJ|jtj
krOd| j _nd| j _| j jdkrqt }| jdkrk||
 | }n+||
|}n%| j jdkrt }||
d| j|d}n| j jdkrt }||
|}|s|
f|dd  }|dur|f| S |S t||
|jd	S )
a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr  r   
regressionsingle_label_classificationmulti_label_classificationr<   rq   r  )r[   r   r   rR   r.  problem_typer-  r@   r#   rX   intr
   squeezer	   r  r   r   r~   )rZ   ra   r>   r;   rb   r  r   r   r   r   r   r   r  r   r*   r*   r+   rg   t  sF   	



"


z%FNetForSequenceClassification.forwardr!  )rh   ri   rj   rB   r   r   r   r   r   r   r  r   r#   r   r  r   r   rg   rl   r*   r*   r\   r+   r*  a  s@    
	r*  z
    FNet Model with a multiple choice classification head on top (a linear layer on top of the pooled output and a
    softmax) e.g. for RocStories/SWAG tasks.
    c                       r)  )FNetForMultipleChoicec                    s@   t  | t|| _t|j| _t|j	d| _
|   d S r   )rA   rB   r   r   r   rP   rQ   rR   rN   rE   r.  r   rY   r\   r*   r+   rB     s
   
zFNetForMultipleChoice.__init__z(batch_size, num_choices, sequence_lengthr   Nra   r>   r;   rb   r  r   r   r   c                 C   sF  |dur|n| j j}|dur|jd n|jd }|dur%|d|dnd}|dur4|d|dnd}|durC|d|dnd}|durV|d|d|dnd}| j||||||d}	|	d }
| |
}
| |
}|d|}d}|durt }|||}|s|f|	dd  }|dur|f| S |S t	|||	j
dS )aJ  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
            num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
            `input_ids` above)
        Nr   r<   r  rq   r  )r[   r   r!   r  rW   r   rR   r.  r	   r   r~   )rZ   ra   r>   r;   rb   r  r   r   num_choicesr   r   r   reshaped_logitsr   r  r   r*   r*   r+   rg     s:   	


zFNetForMultipleChoice.forwardr!  )rh   ri   rj   rB   r   r   r   r   r   r   r  r   r#   r   r  r   r   rg   rl   r*   r*   r\   r+   r5    s@    

	r5  z
    FNet Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for
    Named-Entity-Recognition (NER) tasks.
    c                       r)  )FNetForTokenClassificationc                    r+  r-   r,  rY   r\   r*   r+   rB     s   
z#FNetForTokenClassification.__init__r   r   Nra   r>   r;   rb   r  r   r   r   c                 C   s   |dur|n| j j}| j||||||d}|d }	| |	}	| |	}
d}|dur9t }||
d| j|d}|sO|
f|dd  }|durM|f| S |S t||
|j	dS )z
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
        Nr  r   r<   rq   r  )
r[   r   r   rR   r.  r	   r  r-  r   r~   )rZ   ra   r>   r;   rb   r  r   r   r   r   r   r   r  r   r*   r*   r+   rg     s(   	

z"FNetForTokenClassification.forwardr!  )rh   ri   rj   rB   r   r   r   r   r   r   r  r   r#   r   r  r   r   rg   rl   r*   r*   r\   r+   r9    s@    
	r9  z
    FNet Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear
    layers on top of the hidden-states output to compute `span start logits` and `span end logits`).
    c                       s   e Zd Z fddZeedeee	e
d								ddeej deej deej d	eej d
eej deej dee dee deee	f fddZ  ZS )FNetForQuestionAnsweringc                    s<   t  | |j| _t|| _t|j|j| _| 	  d S r-   )
rA   rB   r-  r   r   r   rN   rE   
qa_outputsr   rY   r\   r*   r+   rB   R  s
   
z!FNetForQuestionAnswering.__init__r   r   Nra   r>   r;   rb   start_positionsend_positionsr   r   r   c	                 C   s>  |dur|n| j j}| j||||||d}	|	d }
| |
}|jddd\}}|d }|d }d}|dur|durt| dkrL|d}t| dkrY|d}|d}|	d|}|	d|}t
|d}|||}|||}|| d }|s||f|	dd  }|dur|f| S |S t||||	jd	S )
a  
        start_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for position (index) of the start of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.
        end_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for position (index) of the end of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.
        Nr  r   r   r<   rr   )ignore_indexrq   )r   start_logits
end_logitsr~   )r[   r   r   r;  splitr4  
contiguouslenrW   clampr	   r   r~   )rZ   ra   r>   r;   rb   r<  r=  r   r   r   r   r   r?  r@  r  ignored_indexr  
start_lossend_lossr   r*   r*   r+   rg   ]  sB   	







z FNetForQuestionAnswering.forwardr  )rh   ri   rj   rB   r   r   r   r   r   r   r  r   r#   r   r  r   r   rg   rl   r*   r*   r\   r+   r:  J  sF    	

r:  )Prk   r#  dataclassesr   	functoolsr   typingr   r   r   r#   torch.utils.checkpointr   torch.nnr   r	   r
   utilsr   scipyr   activationsr   modeling_outputsr   r   r   r   r   r   r   r   r   modeling_utilsr   pytorch_utilsr   r   r   r   r   r   configuration_fnetr    
get_loggerrh   loggerr   r  "FNET_PRETRAINED_MODEL_ARCHIVE_LISTr,   r.   r6   Moduler7   rn   r   r   r   r   r   r   r   r   r   r   r   r   r   r   FNET_START_DOCSTRINGr   r   r  r  r"  r*  r5  r9  r:  r*   r*   r*   r+   <module>   s   ,
	=&
#f]BXNH>