o
    h                     @   s\  d Z ddlZddlmZ ddlmZmZmZmZ ddl	Z	ddl
Z	ddl	mZ ddlmZ ddlmZ dd	lmZmZmZ dd
lmZ ddlmZmZmZ ddlmZmZmZmZmZ ddl m!Z! e"e#Z$dZ%dZ&ddgZ'dZ(dZ)eG dd deZ*G dd dej+Z,G dd dej+Z-G dd dej+Z.G dd dej+Z/G dd dej+Z0G d d! d!ej+Z1G d"d# d#ej+Z2G d$d% d%ej+Z3G d&d' d'ej+Z4G d(d) d)ej+Z5G d*d+ d+ej+Z6G d,d- d-ej+Z7G d.d/ d/ej+Z8G d0d1 d1eZ9ed2e(G d3d4 d4e9Z:ed5e(G d6d7 d7e9Z;ed8e(G d9d: d:e9Z<ed;e(G d<d= d=e9Z=dS )>z PyTorch Bros model.    N)	dataclass)ListOptionalTupleUnion)nn)CrossEntropyLoss   )ACT2FN))BaseModelOutputWithPastAndCrossAttentions,BaseModelOutputWithPoolingAndCrossAttentionsTokenClassifierOutput)PreTrainedModel)apply_chunking_to_forward find_pruneable_heads_and_indicesprune_linear_layer)ModelOutputadd_start_docstrings%add_start_docstrings_to_model_forwardloggingreplace_return_docstrings   )
BrosConfigzjinho8345/bros-base-uncasedr   zjinho8345/bros-large-uncasedaK  
    This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass.
    Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
    and behavior.

    Parameters:
        config ([`BrosConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
a  
    Args:
        input_ids (`torch.LongTensor` of shape `({0})`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`BrosProcessor`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)

        bbox ('torch.FloatTensor' of shape '(batch_size, num_boxes, 4)'):
            Bounding box coordinates for each token in the input sequence. Each bounding box is a list of four values
            (x1, y1, x2, y2), where (x1, y1) is the top left corner, and (x2, y2) is the bottom right corner of the
            bounding box.

        attention_mask (`torch.FloatTensor` of shape `({0})`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            [What are attention masks?](../glossary#attention-mask)

        bbox_first_token_mask (`torch.FloatTensor` of shape `({0})`, *optional*):
            Mask to indicate the first token of each bounding box. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

        token_type_ids (`torch.LongTensor` of shape `({0})`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)

        position_ids (`torch.LongTensor` of shape `({0})`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)

        head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:

            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.

        inputs_embeds (`torch.FloatTensor` of shape `({0}, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
            model's internal embedding lookup matrix.

        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.

        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.

        return_dict (`bool`, *optional*):
            Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
c                   @   sl   e Zd ZU dZdZeej ed< dZ	ejed< dZ
ejed< dZeeej  ed< dZeeej  ed< dS )BrosSpadeOutputa  
    Base class for outputs of token classification models.

    Args:
        loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided) :
            Classification loss.
        initial_token_logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.num_labels)`):
            Classification scores for entity initial tokens (before SoftMax).
        subsequent_token_logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, sequence_length+1)`):
            Classification scores for entity sequence tokens (before SoftMax).
        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    Nlossinitial_token_logitssubsequent_token_logitshidden_states
attentions)__name__
__module____qualname____doc__r   r   torchFloatTensor__annotations__r   r   r   r   r    r&   r&   \/var/www/html/ai/venv/lib/python3.10/site-packages/transformers/models/bros/modeling_bros.pyr      s   
 r   c                       2   e Zd Z fddZdejdejfddZ  ZS )BrosPositionalEmbedding1Dc                    sD   t t|   |j| _ddtd| jd| j   }| d| d S )Nr   i'          g       @inv_freq)superr)   __init__dim_bbox_sinusoid_emb_1dr#   arangeregister_buffer)selfconfigr+   	__class__r&   r'   r-      s   z"BrosPositionalEmbedding1D.__init__pos_seqreturnc                 C   sX   |  }|\}}}||||d| jddd| jd  }tj| | gdd}|S )Nr      dim)sizeviewr+   r.   r#   catsincos)r1   r5   seq_sizeb1b2b3sinusoid_inppos_embr&   r&   r'   forward   s
   
(z!BrosPositionalEmbedding1D.forwardr   r    r!   r-   r#   TensorrF   __classcell__r&   r&   r3   r'   r)      s    
r)   c                       r(   )BrosPositionalEmbedding2Dc                    s.   t t|   |j| _t|| _t|| _d S N)r,   rJ   r-   dim_bboxr)   	x_pos_emb	y_pos_embr1   r2   r3   r&   r'   r-      s   
z"BrosPositionalEmbedding2D.__init__bboxr6   c                 C   sd   g }t | jD ]!}|d dkr|| |d|f  q|| |d|f  qtj|dd}|S )Nr7   r   .r8   r9   )rangerL   appendrM   rN   r#   r=   )r1   rP   stackibbox_pos_embr&   r&   r'   rF      s   z!BrosPositionalEmbedding2D.forwardrG   r&   r&   r3   r'   rJ      s    rJ   c                       s,   e Zd Z fddZdejfddZ  ZS )BrosBboxEmbeddingsc                    s2   t t|   t|| _tj|j|jdd| _	d S )NF)bias)
r,   rV   r-   rJ   bbox_sinusoid_embr   Lineardim_bbox_sinusoid_emb_2ddim_bbox_projectionbbox_projectionrO   r3   r&   r'   r-      s   
zBrosBboxEmbeddings.__init__rP   c                 C   s\   | dd}|d d d d d d d f |d d d d d d d f  }| |}| |}|S )Nr   r   )	transposerX   r\   )r1   rP   bbox_tbbox_posrU   r&   r&   r'   rF      s
   8

zBrosBboxEmbeddings.forwardrG   r&   r&   r3   r'   rV      s    rV   c                       sh   e Zd ZdZ fddZ					ddeej deej deej d	eej d
edejfddZ	  Z
S )BrosTextEmbeddingszGConstruct the embeddings from word, position and token_type embeddings.c                    s   t    tj|j|j|jd| _t|j|j| _	t|j
|j| _tj|j|jd| _t|j| _t|dd| _| dt|jd | jdtj| j tj| jjdd	d
 d S )N)padding_idxepsposition_embedding_typeabsoluteposition_ids)r   r8   token_type_idsdtypedeviceF)
persistent)r,   r-   r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsDropouthidden_dropout_probdropoutgetattrrd   r0   r#   r/   expandzerosrf   r;   longrj   rO   r3   r&   r'   r-      s"   

zBrosTextEmbeddings.__init__Nr   	input_idsrg   rf   inputs_embedspast_key_values_lengthr6   c                 C   s   |d ur	|  }n|  d d }|d }|d u r&| jd d ||| f }|d u rPt| drE| jd d d |f }||d |}	|	}ntj|tj| jjd}|d u rY| 	|}| 
|}
||
 }| jdkrp| |}||7 }| |}| |}|S )Nr8   r   rg   r   rh   re   )r;   rf   hasattrrg   r{   r#   r|   r}   rj   rp   rt   rd   rr   ru   ry   )r1   r~   rg   rf   r   r   input_shape
seq_lengthbuffered_token_type_ids buffered_token_type_ids_expandedrt   
embeddingsrr   r&   r&   r'   rF      s,   







zBrosTextEmbeddings.forward)NNNNr   )r   r    r!   r"   r-   r   r#   rH   intrF   rI   r&   r&   r3   r'   r`      s*    r`   c                       s   e Zd Z fddZdejfddZ						ddejd	ejd
eej deej deej deej deeeej	   deej deej fddZ
  ZS )BrosSelfAttentionc                    s   t    |j|j dkrt|dstd|j d|j d|j| _t|j|j | _| j| j | _t	
|j| j| _t	
|j| j| _t	
|j| j| _t	|j| _t|dd| _| jdksf| jd	krw|j| _t	d
|j d | j| _|j| _d S )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()rd   re   relative_keyrelative_key_queryr7   r   )r,   r-   rn   num_attention_headsr   
ValueErrorr   attention_head_sizeall_head_sizer   rY   querykeyvaluerw   attention_probs_dropout_probry   rz   rd   rq   rl   distance_embedding
is_decoderrO   r3   r&   r'   r-   !  s&   

zBrosSelfAttention.__init__xc                 C   s6   |  d d | j| jf }|j| }|ddddS )Nr8   r   r7   r   r	   )r;   r   r   r<   permute)r1   r   new_x_shaper&   r&   r'   transpose_for_scores9  s   
z&BrosSelfAttention.transpose_for_scoresNFr   rU   attention_mask	head_maskencoder_hidden_statesencoder_attention_maskpast_key_valueoutput_attentionsr6   c	                 C   s  |  |}	|d u}
|
r|d ur|d }|d }|}nP|
r/| | |}| | |}|}n;|d urZ| | |}| | |}tj|d |gdd}tj|d |gdd}n| | |}| | |}| |	}| jrv||f}t||dd}| j	dks| j	dkr|
 d }tj|tj|jd	dd}tj|tj|jd	dd}|| }| || j d }|j|jd
}| j	dkrtd||}|| }n| j	dkrtd||}td||}|| | }|j\}}}}|||||}|g d}td||f}|| }|t| j }|d ur|| }tjdd|}| |}|d ur4|| }t||}|dddd }|
 d d | jf }|j| }|r\||fn|f}| jrh||f }|S )Nr   r   r7   r9   r8   r   r   rh   ri   zbhld,lrd->bhlrzbhrd,lrd->bhlr)r7   r   r   r	   zbnid,bijd->bnijr	   )r   r   r   r   r#   r=   r   matmulr]   rd   r;   r/   r}   rj   r<   r   rq   tori   einsumshaper   mathsqrtr   r   Softmaxry   
contiguousr   )r1   r   rU   r   r   r   r   r   r   mixed_query_layeris_cross_attention	key_layervalue_layerquery_layerattention_scoresr   position_ids_lposition_ids_rdistancepositional_embeddingrelative_position_scoresrelative_position_scores_queryrelative_position_scores_key
batch_sizen_headd_headbbox_pos_scoresattention_probscontext_layernew_context_layer_shapeoutputsr&   r&   r'   rF   A  sn   









zBrosSelfAttention.forwardNNNNNF)r   r    r!   r-   r#   rH   r   r   r   r$   rF   rI   r&   r&   r3   r'   r      s8    	
r   c                       8   e Zd Z fddZdejdejdejfddZ  ZS )BrosSelfOutputc                    sB   t    t|j|j| _tj|j|jd| _t|j	| _
d S Nrb   )r,   r-   r   rY   rn   denseru   rv   rw   rx   ry   rO   r3   r&   r'   r-        
zBrosSelfOutput.__init__r   input_tensorr6   c                 C   &   |  |}| |}| || }|S rK   r   ry   ru   r1   r   r   r&   r&   r'   rF        

zBrosSelfOutput.forwardrG   r&   r&   r3   r'   r         $r   c                       s   e Zd Z fddZdd Z						ddejdejd	eej d
eej deej deej deeeej	   dee
 deej fddZ  ZS )BrosAttentionc                    s*   t    t|| _t|| _t | _d S rK   )r,   r-   r   r1   r   outputsetpruned_headsrO   r3   r&   r'   r-     s   


zBrosAttention.__init__c                 C   s   t |dkrd S t|| jj| jj| j\}}t| jj|| j_t| jj|| j_t| jj	|| j_	t| j
j|dd| j
_| jjt | | j_| jj| jj | j_| j|| _d S )Nr   r   r9   )lenr   r1   r   r   r   r   r   r   r   r   r   r   union)r1   headsindexr&   r&   r'   prune_heads  s   zBrosAttention.prune_headsNFr   rU   r   r   r   r   r   r   r6   c	              
   C   s@   | j ||||||||d}	| |	d |}
|
f|	dd   }|S )Nr   rU   r   r   r   r   r   r   r   r   )r1   r   )r1   r   rU   r   r   r   r   r   r   self_outputsattention_outputr   r&   r&   r'   rF     s   
zBrosAttention.forwardr   )r   r    r!   r-   r   r#   rH   r   r   r$   boolrF   rI   r&   r&   r3   r'   r     s8    	
r   c                       r(   )BrosIntermediatec                    sD   t    t|j|j| _t|jt	rt
|j | _d S |j| _d S rK   )r,   r-   r   rY   rn   intermediate_sizer   
isinstance
hidden_actstrr
   intermediate_act_fnrO   r3   r&   r'   r-     s
   
zBrosIntermediate.__init__r   r6   c                 C   s   |  |}| |}|S rK   )r   r   )r1   r   r&   r&   r'   rF     s   

zBrosIntermediate.forwardrG   r&   r&   r3   r'   r     s    r   c                       r   )
BrosOutputc                    sB   t    t|j|j| _tj|j|jd| _t	|j
| _d S r   )r,   r-   r   rY   r   rn   r   ru   rv   rw   rx   ry   rO   r3   r&   r'   r-      r   zBrosOutput.__init__r   r   r6   c                 C   r   rK   r   r   r&   r&   r'   rF     r   zBrosOutput.forwardrG   r&   r&   r3   r'   r     r   r   c                       s   e Zd Z fddZ						ddejdejdeej deej d	eej d
eej deeeej   dee	 deej fddZ
dd Z  ZS )	BrosLayerc                    sn   t    |j| _d| _t|| _|j| _|j| _| jr+| js&t|  dt|| _	t
|| _t|| _d S )Nr   z> should be used as a decoder model if cross attention is added)r,   r-   chunk_size_feed_forwardseq_len_dimr   	attentionr   add_cross_attention	Exceptioncrossattentionr   intermediater   r   rO   r3   r&   r'   r-     s   



zBrosLayer.__init__NFr   rU   r   r   r   r   r   r   r6   c	              	   C   s  |d ur
|d d nd }	| j ||||||	d}
|
d }| jr)|
dd }|
d }n|
dd  }d }| jrp|d urpt| drEtd|  d|d urO|d	d  nd }| |||||||}|d }||dd  }|d }|| }t| j| j| j|}|f| }| jr||f }|S )
Nr7   )rU   r   r   r   r   r   r   r8   r   z'If `encoder_hidden_states` are passed, z` has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`r   )	r   r   r   r   r   r   feed_forward_chunkr   r   )r1   r   rU   r   r   r   r   r   r   self_attn_past_key_valueself_attention_outputsr   r   present_key_valuecross_attn_present_key_valuecross_attn_past_key_valuecross_attention_outputslayer_outputr&   r&   r'   rF     sX   


	

zBrosLayer.forwardc                 C   s   |  |}| ||}|S rK   )r   r   )r1   r   intermediate_outputr   r&   r&   r'   r   a  s   
zBrosLayer.feed_forward_chunkr   )r   r    r!   r-   r#   rH   r   r$   r   r   rF   r   rI   r&   r&   r3   r'   r     s8    	

Er   c                       s   e Zd Z fddZ									ddejdejdeej d	eej d
eej deej deeeej   dee	 dee	 dee	 dee	 de
eej ef fddZ  ZS )BrosEncoderc                    s4   t     | _t fddt jD | _d S )Nc                    s   g | ]}t  qS r&   )r   ).0_r2   r&   r'   
<listcomp>k  s    z(BrosEncoder.__init__.<locals>.<listcomp>)r,   r-   r2   r   
ModuleListrQ   num_hidden_layerslayerrO   r3   r   r'   r-   h  s   
$zBrosEncoder.__init__NFTr   rU   r   r   r   r   past_key_values	use_cacher   output_hidden_statesreturn_dictr6   c                 C   s^  |
rdnd }|	r
dnd }|	r| j jrdnd }|rdnd }t| jD ]m\}}|
r,||f }|d ur4|| nd }|d ur>|| nd }t| j ddra| jra|rStd d}| |j	|||||||	}n|||||||||	d}|d }|rz||d f7 }|	r||d f }| j jr||d	 f }q!|
r||f }|st
d
d |||||fD S t|||||dS )Nr&   gradient_checkpointingFzh`use_cache=True` is incompatible with `config.gradient_checkpointing=True`. Setting `use_cache=False`...r   r   r8   r   r7   c                 s   s    | ]	}|d ur|V  qd S rK   r&   )r   vr&   r&   r'   	<genexpr>  s    z&BrosEncoder.forward.<locals>.<genexpr>)last_hidden_stater   r   r   cross_attentions)r2   r   	enumerater   rz   trainingloggerwarning_gradient_checkpointing_func__call__tupler   )r1   r   rU   r   r   r   r   r   r   r   r   r   all_hidden_statesall_self_attentionsall_cross_attentionsnext_decoder_cacherT   layer_modulelayer_head_maskr   layer_outputsr&   r&   r'   rF   m  sz   

zBrosEncoder.forward)	NNNNNNFFT)r   r    r!   r-   r#   rH   r   r$   r   r   r   r   rF   rI   r&   r&   r3   r'   r   g  sH    		
r   c                       r(   )
BrosPoolerc                    s*   t    t|j|j| _t | _d S rK   )r,   r-   r   rY   rn   r   Tanh
activationrO   r3   r&   r'   r-     s   
zBrosPooler.__init__r   r6   c                 C   s(   |d d df }|  |}| |}|S )Nr   )r   r  )r1   r   first_token_tensorpooled_outputr&   r&   r'   rF     s   

zBrosPooler.forwardrG   r&   r&   r3   r'   r    s    r  c                       r(   )BrosRelationExtractorc                    s   t    |j| _|j| _|j| _|j| _t| j| _	t
| j| j| j | _t
| j| j| j | _ttd| j| _d S )Nr   )r,   r-   n_relationsrn   backbone_hidden_sizehead_hidden_sizeclassifier_dropout_probr   rw   droprY   r   r   	Parameterr#   r|   
dummy_noderO   r3   r&   r'   r-     s   
zBrosRelationExtractor.__init__r   r   c              	   C   s   |  | |}| jdd|dd}tj||gdd}| | |}|	|d|d| j
| j}|	|d|d| j
| j}t|dddd|dddd}|S )Nr   r   axisr7   r	   )r   r  r  	unsqueezerepeatr;   r#   r=   r   r<   r  r  r   r   )r1   r   r   	dummy_vecrelation_scorer&   r&   r'   rF     s    zBrosRelationExtractor.forwardrG   r&   r&   r3   r'   r    s    r  c                   @   s    e Zd ZdZeZdZdd ZdS )BrosPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    brosc                 C   s   t |tjr |jjjd| jjd |jdur|jj	  dS dS t |tj
rC|jjjd| jjd |jdurA|jj|j 	  dS dS t |tjrX|jj	  |jjd dS dS )zInitialize the weightsr*   )meanstdNg      ?)r   r   rY   weightdatanormal_r2   initializer_rangerW   zero_rl   ra   ru   fill_)r1   moduler&   r&   r'   _init_weights  s   

z!BrosPreTrainedModel._init_weightsN)r   r    r!   r"   r   config_classbase_model_prefixr.  r&   r&   r&   r'   r#    s
    r#  z^The bare Bros Model transformer outputting raw hidden-states without any specific head on top.c                #       s  e Zd Zd fdd	Zdd Zdd Zdd	 Zee	d
e
eed														ddeej deej deej deej deej deej deej deej deej deeej  dee dee dee dee deeej ef fddZ  ZS ) 	BrosModelTc                    sN   t  | || _t|| _t|| _t|| _|rt	|nd | _
|   d S rK   )r,   r-   r2   r`   r   rV   bbox_embeddingsr   encoderr  poolerinit_weights)r1   r2   add_pooling_layerr3   r&   r'   r-     s   


zBrosModel.__init__c                 C   s   | j jS rK   r   rp   )r1   r&   r&   r'   get_input_embeddings!  s   zBrosModel.get_input_embeddingsc                 C   s   || j _d S rK   r7  )r1   r   r&   r&   r'   set_input_embeddings$  s   zBrosModel.set_input_embeddingsc                 C   s*   |  D ]\}}| jj| j| qdS )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr3  r   r   r   )r1   heads_to_pruner   r   r&   r&   r'   _prune_heads'  s   zBrosModel._prune_headsbatch_size, sequence_lengthoutput_typer/  Nr~   rP   r   rg   rf   r   r   r   r   r   r   r   r   r   r6   c           "      C   s  |dur|n| j j}|dur|n| j j}|dur|n| j j}| j jr-|dur(|n| j j}nd}|dur;|dur;td|durD| }n|durQ| dd }ntd|du r]td|\}}|durh|jn|j}|
durx|
d d j	d nd}|du rt
j||d	}|du rt| jd
r| jjddd|f }|||}|}n	t
j|t
j|d}| |||}| j jr|dur| \}}}||f}|	du rt
j||d	}	| |	}nd}| || j j}| j|||||d}|j	d dkr|ddddg df }|| j j }| |}| j|||||||
||||d}|d } | jdur+| | nd}!|s:| |!f|dd  S t| |!|j|j|j|jdS )a  
        Returns:

        Examples:

        ```python
        >>> import torch
        >>> from transformers import BrosProcessor, BrosModel

        >>> processor = BrosProcessor.from_pretrained("jinho8345/bros-base-uncased")

        >>> model = BrosModel.from_pretrained("jinho8345/bros-base-uncased")

        >>> encoding = processor("Hello, my dog is cute", add_special_tokens=False, return_tensors="pt")
        >>> bbox = torch.tensor([[[0, 0, 1, 1]]]).repeat(1, encoding["input_ids"].shape[-1], 1)
        >>> encoding["bbox"] = bbox

        >>> outputs = model(**encoding)
        >>> last_hidden_states = outputs.last_hidden_state
        ```NFzDYou cannot specify both input_ids and inputs_embeds at the same timer8   z5You have to specify either input_ids or inputs_embedszYou have to specify bboxr   r7   )rj   rg   rh   )r~   rf   rg   r   r      )r   r   r7   r   r7   r	   r   r	   )
rU   r   r   r   r   r   r   r   r   r   r   )r   pooler_outputr   r   r   r  )r2   r   r   use_return_dictr   r   r   r;   rj   r   r#   onesr   r   rg   r{   r|   r}   get_extended_attention_maskinvert_attention_maskget_head_maskr   
bbox_scaler2  r3  r4  r   r   r   r   r  )"r1   r~   rP   r   rg   rf   r   r   r   r   r   r   r   r   r   r   r   r   rj   r   r   r   extended_attention_maskencoder_batch_sizeencoder_sequence_lengthr   encoder_hidden_shapeencoder_extended_attention_maskembedding_outputscaled_bboxbbox_position_embeddingsencoder_outputssequence_outputr  r&   r&   r'   rF   /  s   '
	
zBrosModel.forward)T)NNNNNNNNNNNNNN)r   r    r!   r-   r8  r9  r<  r   BROS_INPUTS_DOCSTRINGformatr   r   _CONFIG_FOR_DOCr   r#   rH   r   r$   r   r   r   rF   rI   r&   r&   r3   r'   r1    sh    
	
r1  z
    Bros Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for
    Named-Entity-Recognition (NER) tasks.
    c                          e Zd ZdgZ fddZeedee	e
d												ddeej deej d	eej d
eej deej deej deej deej deej dee dee dee deeej e	f fddZ  ZS )BrosForTokenClassificationr4  c                    s^   t  | |j| _t|| _t|dr|jn|j}t	|| _
t|j|j| _|   d S Nclassifier_dropout)r,   r-   
num_labelsr1  r$  r   rX  rx   r   rw   ry   rY   rn   
classifierr5  r1   r2   rX  r3   r&   r'   r-     s   
z#BrosForTokenClassification.__init__r=  r>  Nr~   rP   r   bbox_first_token_maskrg   rf   r   r   labelsr   r   r   r6   c                 C   s   |dur|n| j j}| j||||||||
||d
}|d }| |}| |}d}|	durXt }|durK|d}||d| j| |	d| }n||d| j|	d}|sn|f|dd  }|durl|f| S |S t|||j	|j
dS )ax  

        Returns:

        Examples:

        ```python
        >>> import torch
        >>> from transformers import BrosProcessor, BrosForTokenClassification

        >>> processor = BrosProcessor.from_pretrained("jinho8345/bros-base-uncased")

        >>> model = BrosForTokenClassification.from_pretrained("jinho8345/bros-base-uncased")

        >>> encoding = processor("Hello, my dog is cute", add_special_tokens=False, return_tensors="pt")
        >>> bbox = torch.tensor([[[0, 0, 1, 1]]]).repeat(1, encoding["input_ids"].shape[-1], 1)
        >>> encoding["bbox"] = bbox

        >>> outputs = model(**encoding)
        ```N)	rP   r   rg   rf   r   r   r   r   r   r   r8   r7   r   logitsr   r   )r2   rB  r$  ry   rZ  r   r<   rY  r   r   r   )r1   r~   rP   r   r\  rg   rf   r   r   r]  r   r   r   r   rQ  r_  r   loss_fctr   r&   r&   r'   rF     sD   &


z"BrosForTokenClassification.forwardNNNNNNNNNNNNr   r    r!   "_keys_to_ignore_on_load_unexpectedr-   r   rR  rS  r   r   rT  r   r#   rH   r   r   r   rF   rI   r&   r&   r3   r'   rV    sX    
	
rV  a  
    Bros Model with a token classification head on top (initial_token_layers and subsequent_token_layer on top of the
    hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks. The initial_token_classifier is used to
    predict the first token of each entity, and the subsequent_token_classifier is used to predict the subsequent
    tokens within an entity. Compared to BrosForTokenClassification, this model is more robust to serialization errors
    since it predicts next token from one token.
    c                !       s   e Zd ZdgZ fddZeedee	e
d													ddeej deej d	eej d
eej deej deej deej deej deej deej dee dee dee deeej e	f fddZ  ZS )!BrosSpadeEEForTokenClassificationr4  c              	      s   t  | || _|j| _|j| _|j| _t|| _t	|dr"|j
n|j}tt|t|j|jt|t|j|j| _t|| _|   d S rW  )r,   r-   r2   rY  r  rn   r  r1  r$  r   rX  rx   r   
Sequentialrw   rY   initial_token_classifierr  subsequent_token_classifierr5  r[  r3   r&   r'   r-   4  s    

z*BrosSpadeEEForTokenClassification.__init__r=  r>  Nr~   rP   r   r\  rg   rf   r   r   initial_token_labelssubsequent_token_labelsr   r   r   r6   c                 C   s  |dur|n| j j}| j||||||||||d
}|d }|dd }| |dd }| ||d}d| }|j\}}|j	}t
j|t
|dg|gdd }||dddddf t
|jj}t
||d | }||dddddf t
|jj}|d }d}|	dur|
durt }|	d}	|dur|d}||d| j| |	| }n
||d| j|	}|
d}
||d|d | |
| }|| }|s||f|dd  }|dur|f| S |S t||||j|jdS )	a  
        Returns:

        Examples:

        ```python
        >>> import torch
        >>> from transformers import BrosProcessor, BrosSpadeEEForTokenClassification

        >>> processor = BrosProcessor.from_pretrained("jinho8345/bros-base-uncased")

        >>> model = BrosSpadeEEForTokenClassification.from_pretrained("jinho8345/bros-base-uncased")

        >>> encoding = processor("Hello, my dog is cute", add_special_tokens=False, return_tensors="pt")
        >>> bbox = torch.tensor([[[0, 0, 1, 1]]]).repeat(1, encoding["input_ids"].shape[-1], 1)
        >>> encoding["bbox"] = bbox

        >>> outputs = model(**encoding)
        ```N
r~   rP   r   rg   rf   r   r   r   r   r   r   r   r  r8   r7   )r   r   r   r   r   )r2   rB  r$  r]   r   rf  rg  squeezer   rj   r#   r=   r|   r   r   masked_fillfinfori   mineyer<   r   rY  r   r   r   )r1   r~   rP   r   r\  rg   rf   r   r   rh  ri  r   r   r   r   last_hidden_statesr   r   inv_attention_maskr   max_seq_lengthrj   invalid_token_maskself_token_masksubsequent_token_maskr   r`  initial_token_losssubsequent_token_lossr   r&   r&   r'   rF   M  sp   &
&  


z)BrosSpadeEEForTokenClassification.forward)NNNNNNNNNNNNN)r   r    r!   rc  r-   r   rR  rS  r   r   rT  r   r#   rH   r   r   r   rF   rI   r&   r&   r3   r'   rd  '  s^    
	
rd  z
    Bros Model with a token classification head on top (a entity_linker layer on top of the hidden-states output) e.g.
    for Entity-Linking. The entity_linker is used to predict intra-entity links (one entity to another entity).
    c                       rU  )!BrosSpadeELForTokenClassificationr4  c                    sx   t  | || _|j| _|j| _|j| _t|| _t	|dr"|j
n|j t|| _|   d S  t|| _|   d S rW  )r,   r-   r2   rY  r  rn   r  r1  r$  r   rX  rx   r  entity_linkerr5  rO   r3   r&   r'   r-     s   


z*BrosSpadeELForTokenClassification.__init__r=  r>  Nr~   rP   r   r\  rg   rf   r   r   r]  r   r   r   r6   c                 C   s  |dur|n| j j}| j||||||||
||d
}|d }|dd }| ||d}d}|	durt }|j\}}|j	}t
||d | }|d}t
j| t
j|dgt
jd|gdd}||dddddf t
|jj}||dddddf t
|jj}||d|d | |	d| }|s|f|dd  }|dur|f| S |S t|||j|jd	S )
a  
        Returns:

        Examples:

        ```python
        >>> import torch
        >>> from transformers import BrosProcessor, BrosSpadeELForTokenClassification

        >>> processor = BrosProcessor.from_pretrained("jinho8345/bros-base-uncased")

        >>> model = BrosSpadeELForTokenClassification.from_pretrained("jinho8345/bros-base-uncased")

        >>> encoding = processor("Hello, my dog is cute", add_special_tokens=False, return_tensors="pt")
        >>> bbox = torch.tensor([[[0, 0, 1, 1]]]).repeat(1, encoding["input_ids"].shape[-1], 1)
        >>> encoding["bbox"] = bbox

        >>> outputs = model(**encoding)
        ```Nrj  r   r   r8   r   r  r7   r^  )r2   rB  r$  r]   r   ry  rk  r   r   rj   r#   ro  r   r   r<   r=   r|   rl  rm  ri   rn  r   r   r   )r1   r~   rP   r   r\  rg   rf   r   r   r]  r   r   r   r   rp  r_  r   r`  r   rr  rj   rt  maskr   r&   r&   r'   rF     sR   $

(($z)BrosSpadeELForTokenClassification.forwardra  rb  r&   r&   r3   r'   rx    sX    
	
rx  )>r"   r   dataclassesr   typingr   r   r   r   r#   torch.utils.checkpointr   torch.nnr   activationsr
   modeling_outputsr   r   r   modeling_utilsr   pytorch_utilsr   r   r   utilsr   r   r   r   r   configuration_brosr   
get_loggerr   r  _CHECKPOINT_FOR_DOCrT  "BROS_PRETRAINED_MODEL_ARCHIVE_LISTBROS_START_DOCSTRINGrR  r   Moduler)   rJ   rV   r`   r   r   r   r   r   r   r   r  r  r#  r1  rV  rd  rx  r&   r&   r&   r'   <module>   sz   
DB 7Z^" ,a
 	