o
    h                     @   sZ  d Z ddlZddlZddlmZmZmZ ddlZddlZddlm	Z	 ddl
mZmZmZ ddlmZ ddlmZmZmZ dd	lmZmZmZmZmZmZ dd
lmZmZmZmZ ddl m!Z! ddl"m#Z# e!$e%Z&dZ'dZ(ddgZ)G dd de	j*Z+d=ddZ,G dd de	j*Z-G dd de	j*Z.G dd de	j*Z/G dd de	j*Z0G dd de	j*Z1G dd  d e	j*Z2G d!d" d"e	j*Z3G d#d$ d$e	j*Z4G d%d& d&e	j*Z5G d'd( d(e	j*Z6G d)d* d*e	j*Z7G d+d, d,e	j*Z8G d-d. d.eZ9d/Z:d0Z;ed1e:G d2d3 d3e9Z<ed4e:G d5d6 d6e9Z=ed7e:G d8d9 d9e9Z>ed:e:G d;d< d<e9Z?dS )>z PyTorch MarkupLM model.    N)OptionalTupleUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)add_start_docstrings%add_start_docstrings_to_model_forwardreplace_return_docstrings))BaseModelOutputWithPastAndCrossAttentions,BaseModelOutputWithPoolingAndCrossAttentionsMaskedLMOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModelapply_chunking_to_forward find_pruneable_heads_and_indicesprune_linear_layer)logging   )MarkupLMConfigzmicrosoft/markuplm-baser   zmicrosoft/markuplm-largec                       s*   e Zd ZdZ fddZdddZ  ZS )XPathEmbeddingszConstruct the embeddings from xpath tags and subscripts.

    We drop tree-id in this version, as its info can be covered by xpath.
    c                    s   t t|    j| _t j| j  j| _t	 j
| _t | _t j| j d j | _td j  j| _t fddt| jD | _t fddt| jD | _d S )N   c                       g | ]
}t  j jqS  )r   	Embeddingmax_xpath_tag_unit_embeddingsxpath_unit_hidden_size.0_configr   d/var/www/html/ai/venv/lib/python3.10/site-packages/transformers/models/markuplm/modeling_markuplm.py
<listcomp>P       z,XPathEmbeddings.__init__.<locals>.<listcomp>c                    r   r   )r   r   max_xpath_subs_unit_embeddingsr!   r"   r%   r   r'   r(   W   r)   )superr   __init__	max_depthr   Linearr!   hidden_sizexpath_unitseq2_embeddingsDropouthidden_dropout_probdropoutReLU
activationxpath_unitseq2_inner	inner2emb
ModuleListrangexpath_tag_sub_embeddingsxpath_subs_sub_embeddingsselfr&   	__class__r%   r'   r,   C   s"   



zXPathEmbeddings.__init__Nc              	   C   s   g }g }t | jD ](}|| j| |d d d d |f  || j| |d d d d |f  q	tj|dd}tj|dd}|| }| | | 	| 
|}|S )Ndim)r9   r-   appendr:   r;   torchcatr7   r3   r5   r6   )r=   xpath_tags_seqxpath_subs_seqxpath_tags_embeddingsxpath_subs_embeddingsixpath_embeddingsr   r   r'   forward]   s   &(zXPathEmbeddings.forward)NN)__name__
__module____qualname____doc__r,   rL   __classcell__r   r   r>   r'   r   =   s    r   c                 C   s6   |  | }tj|dd|| | }| | S )a  
    Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
    are ignored. This is modified from fairseq's `utils.make_positions`.

    Args:
        x: torch.Tensor x:

    Returns: torch.Tensor
    r   rA   )neintrD   cumsumtype_aslong)	input_idspadding_idxpast_key_values_lengthmaskincremental_indicesr   r   r'   "create_position_ids_from_input_idsp   s   r\   c                       s@   e Zd ZdZ fddZdd Z							d
dd	Z  ZS )MarkupLMEmbeddingszGConstruct the embeddings from word, position and token_type embeddings.c                    s   t t|   || _tj|j|j|jd| _	t|j
|j| _|j| _t|| _t|j|j| _tj|j|jd| _t|j| _| jdt|j
ddd |j| _tj|j
|j| jd| _d S )N)rX   epsposition_ids)r   r@   F)
persistent)r+   r]   r,   r&   r   r   
vocab_sizer/   pad_token_idword_embeddingsmax_position_embeddingsposition_embeddingsr-   r   rK   type_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsr1   r2   r3   register_bufferrD   arangeexpandrX   r<   r>   r   r'   r,      s    
zMarkupLMEmbeddings.__init__c                 C   sN   |  dd }|d }tj| jd || j d tj|jd}|d|S )z
        We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.

        Args:
            inputs_embeds: torch.Tensor

        Returns: torch.Tensor
        Nr@   r   dtypedevicer   )sizerD   rl   rX   rV   rp   	unsqueezerm   )r=   inputs_embedsinput_shapesequence_lengthr`   r   r   r'   &create_position_ids_from_inputs_embeds   s   	z9MarkupLMEmbeddings.create_position_ids_from_inputs_embedsNr   c                 C   s<  |d ur	|  }n|  d d }|d ur|jn|j}	|d u r0|d ur+t|| j|}n| |}|d u r=tj|tj|	d}|d u rF| |}|d u r_| j	j
tjtt|| jg tj|	d }|d u rx| j	jtjtt|| jg tj|	d }|}
| |}| |}| ||}|
| | | }| |}| |}|S )Nr@   rn   )rq   rp   r\   rX   rv   rD   zerosrV   rd   r&   
tag_pad_idonestuplelistr-   subs_pad_idrf   rh   rK   ri   r3   )r=   rW   rF   rG   token_type_idsr`   rs   rY   rt   rp   words_embeddingsrf   rh   rK   
embeddingsr   r   r'   rL      s8   









zMarkupLMEmbeddings.forward)NNNNNNr   )rM   rN   rO   rP   r,   rv   rL   rQ   r   r   r>   r'   r]      s    r]   c                       8   e Zd Z fddZdejdejdejfddZ  ZS )MarkupLMSelfOutputc                    sB   t    t|j|j| _tj|j|jd| _t|j	| _
d S Nr^   )r+   r,   r   r.   r/   denseri   rj   r1   r2   r3   r<   r>   r   r'   r,         
zMarkupLMSelfOutput.__init__hidden_statesinput_tensorreturnc                 C   &   |  |}| |}| || }|S Nr   r3   ri   r=   r   r   r   r   r'   rL         

zMarkupLMSelfOutput.forwardrM   rN   rO   r,   rD   TensorrL   rQ   r   r   r>   r'   r          $r   c                       2   e Zd Z fddZdejdejfddZ  ZS )MarkupLMIntermediatec                    sD   t    t|j|j| _t|jt	rt
|j | _d S |j| _d S r   )r+   r,   r   r.   r/   intermediate_sizer   
isinstance
hidden_actstrr
   intermediate_act_fnr<   r>   r   r'   r,      s
   
zMarkupLMIntermediate.__init__r   r   c                 C      |  |}| |}|S r   )r   r   r=   r   r   r   r'   rL         

zMarkupLMIntermediate.forwardr   r   r   r>   r'   r      s    r   c                       r   )MarkupLMOutputc                    sB   t    t|j|j| _tj|j|jd| _t	|j
| _d S r   )r+   r,   r   r.   r   r/   r   ri   rj   r1   r2   r3   r<   r>   r   r'   r,     r   zMarkupLMOutput.__init__r   r   r   c                 C   r   r   r   r   r   r   r'   rL   	  r   zMarkupLMOutput.forwardr   r   r   r>   r'   r     r   r   c                       r   )MarkupLMPoolerc                    s*   t    t|j|j| _t | _d S r   )r+   r,   r   r.   r/   r   Tanhr5   r<   r>   r   r'   r,     s   
zMarkupLMPooler.__init__r   r   c                 C   s(   |d d df }|  |}| |}|S )Nr   )r   r5   )r=   r   first_token_tensorpooled_outputr   r   r'   rL     s   

zMarkupLMPooler.forwardr   r   r   r>   r'   r     s    r   c                       r   )MarkupLMPredictionHeadTransformc                    sV   t    t|j|j| _t|jtrt	|j | _
n|j| _
tj|j|jd| _d S r   )r+   r,   r   r.   r/   r   r   r   r   r
   transform_act_fnri   rj   r<   r>   r   r'   r,   "  s   
z(MarkupLMPredictionHeadTransform.__init__r   r   c                 C   s"   |  |}| |}| |}|S r   )r   r   ri   r   r   r   r'   rL   +  s   


z'MarkupLMPredictionHeadTransform.forwardr   r   r   r>   r'   r   !  s    	r   c                       s$   e Zd Z fddZdd Z  ZS )MarkupLMLMPredictionHeadc                    sL   t    t|| _tj|j|jdd| _t	t
|j| _| j| j_d S )NF)bias)r+   r,   r   	transformr   r.   r/   rb   decoder	ParameterrD   rw   r   r<   r>   r   r'   r,   4  s
   

z!MarkupLMLMPredictionHead.__init__c                 C   r   r   )r   r   r   r   r   r'   rL   A  r   z MarkupLMLMPredictionHead.forward)rM   rN   rO   r,   rL   rQ   r   r   r>   r'   r   3  s    r   c                       r   )MarkupLMOnlyMLMHeadc                    s   t    t|| _d S r   )r+   r,   r   predictionsr<   r>   r   r'   r,   I  s   
zMarkupLMOnlyMLMHead.__init__sequence_outputr   c                 C   s   |  |}|S r   )r   )r=   r   prediction_scoresr   r   r'   rL   M  s   
zMarkupLMOnlyMLMHead.forwardr   r   r   r>   r'   r   H  s    r   c                       s   e Zd Zd fdd	ZdejdejfddZ						dd	ejd
eej deej deej deej dee	e	ej   dee
 de	ej fddZ  ZS )MarkupLMSelfAttentionNc                    s   t    |j|j dkrt|dstd|j d|j d|j| _t|j|j | _| j| j | _t	
|j| j| _t	
|j| j| _t	
|j| j| _t	|j| _|p\t|dd| _| jdksh| jd	kry|j| _t	d
|j d | j| _|j| _d S )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()position_embedding_typeabsoluterelative_keyrelative_key_query   r   )r+   r,   r/   num_attention_headshasattr
ValueErrorrS   attention_head_sizeall_head_sizer   r.   querykeyvaluer1   attention_probs_dropout_probr3   getattrr   re   r   distance_embedding
is_decoderr=   r&   r   r>   r   r'   r,   T  s*   

zMarkupLMSelfAttention.__init__xr   c                 C   s6   |  d d | j| jf }||}|ddddS )Nr@   r   r   r   r	   )rq   r   r   viewpermute)r=   r   new_x_shaper   r   r'   transpose_for_scoresn  s   
z*MarkupLMSelfAttention.transpose_for_scoresFr   attention_mask	head_maskencoder_hidden_statesencoder_attention_maskpast_key_valueoutput_attentionsc                 C   s  |  |}|d u}	|	r|d ur|d }
|d }|}nP|	r/| | |}
| | |}|}n;|d urZ| | |}
| | |}tj|d |
gdd}
tj|d |gdd}n| | |}
| | |}| |}|d u}| jrz|
|f}t||
dd}| j	dks| j	dkr	|j
d |
j
d }}|rtj|d tj|jd	dd}ntj|tj|jd	dd}tj|tj|jd	dd}|| }| || j d }|j|jd
}| j	dkrtd||}|| }n| j	dkr	td||}td|
|}|| | }|t| j }|d ur|| }tjj|dd}| |}|d ur0|| }t||}|dddd }| d d | jf }||}|rX||fn|f}| jrd||f }|S )Nr   r   r   rA   r@   r   r   rn   ro   zbhld,lrd->bhlrzbhrd,lrd->bhlrr	   ) r   r   r   r   rD   rE   r   matmul	transposer   shapetensorrV   rp   r   rl   r   re   toro   einsummathsqrtr   r   
functionalsoftmaxr3   r   
contiguousrq   r   )r=   r   r   r   r   r   r   r   mixed_query_layeris_cross_attention	key_layervalue_layerquery_layer	use_cacheattention_scoresquery_length
key_lengthposition_ids_lposition_ids_rdistancepositional_embeddingrelative_position_scoresrelative_position_scores_queryrelative_position_scores_keyattention_probscontext_layernew_context_layer_shapeoutputsr   r   r'   rL   s  sn   









zMarkupLMSelfAttention.forwardr   NNNNNF)rM   rN   rO   r,   rD   r   r   r   FloatTensorr   boolrL   rQ   r   r   r>   r'   r   S  s4    	r   c                       s   e Zd Zd fdd	Zdd Z						ddejdeej d	eej d
eej deej dee	e	ej   dee
 de	ej fddZ  ZS )MarkupLMAttentionNc                    s.   t    t||d| _t|| _t | _d S )Nr   )r+   r,   r   r=   r   outputsetpruned_headsr   r>   r   r'   r,     s   

zMarkupLMAttention.__init__c                 C   s   t |dkrd S t|| jj| jj| j\}}t| jj|| j_t| jj|| j_t| jj	|| j_	t| j
j|dd| j
_| jjt | | j_| jj| jj | j_| j|| _d S )Nr   r   rA   )lenr   r=   r   r   r   r   r   r   r   r   r   r   union)r=   headsindexr   r   r'   prune_heads  s   zMarkupLMAttention.prune_headsFr   r   r   r   r   r   r   r   c              	   C   s<   |  |||||||}| |d |}	|	f|dd   }
|
S )Nr   r   )r=   r   )r=   r   r   r   r   r   r   r   self_outputsattention_outputr   r   r   r'   rL     s   
	zMarkupLMAttention.forwardr   r   )rM   rN   rO   r,   r   rD   r   r   r   r   r   rL   rQ   r   r   r>   r'   r     s4    	r   c                       s   e Zd Z fddZ						ddejdeej deej deej d	eej d
eeeej   dee	 deej fddZ
dd Z  ZS )MarkupLMLayerc                    sr   t    |j| _d| _t|| _|j| _|j| _| jr-| js&t|  dt|dd| _	t
|| _t|| _d S )Nr   z> should be used as a decoder model if cross attention is addedr   r   )r+   r,   chunk_size_feed_forwardseq_len_dimr   	attentionr   add_cross_attentionr   crossattentionr   intermediater   r   r<   r>   r   r'   r,     s   


zMarkupLMLayer.__init__NFr   r   r   r   r   r   r   r   c              	   C   s  |d ur
|d d nd }| j |||||d}	|	d }
| jr(|	dd }|	d }n|	dd  }d }| jro|d urot| dsDtd|  d|d urN|d	d  nd }| |
||||||}|d }
||dd  }|d }|| }t| j| j| j|
}|f| }| jr||f }|S )
Nr   )r   r   r   r   r@   r   z'If `encoder_hidden_states` are passed, z` has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`r   )	r   r   r   r   r   r   feed_forward_chunkr   r   )r=   r   r   r   r   r   r   r   self_attn_past_key_valueself_attention_outputsr   r   present_key_valuecross_attn_present_key_valuecross_attn_past_key_valuecross_attention_outputslayer_outputr   r   r'   rL     sP   


	

zMarkupLMLayer.forwardc                 C   s   |  |}| ||}|S r   )r   r   )r=   r   intermediate_outputr  r   r   r'   r  \  s   
z MarkupLMLayer.feed_forward_chunkr   )rM   rN   rO   r,   rD   r   r   r   r   r   rL   r  rQ   r   r   r>   r'   r     s4    	
Ar   c                       s   e Zd Z fddZ									ddejdeej deej d	eej d
eej deeeej   dee	 dee	 dee	 dee	 de
eej ef fddZ  ZS )MarkupLMEncoderc                    s:   t     | _t fddt jD | _d| _d S )Nc                    s   g | ]}t  qS r   )r   r"   r%   r   r'   r(   g  s    z,MarkupLMEncoder.__init__.<locals>.<listcomp>F)	r+   r,   r&   r   r8   r9   num_hidden_layerslayergradient_checkpointingr<   r>   r%   r'   r,   d  s   
 
zMarkupLMEncoder.__init__NFTr   r   r   r   r   past_key_valuesr   r   output_hidden_statesreturn_dictr   c                 C   s^  |	rdnd }|r
dnd }|r| j jrdnd }| jr%| jr%|r%td d}|r)dnd }t| jD ]^\}}|	r;||f }|d urC|| nd }|d urM|| nd }| jrc| jrc| |j	|||||||}n
||||||||}|d }|rz||d f7 }|r||d f }| j jr||d f }q0|	r||f }|
st
dd	 |||||fD S t|||||d
S )Nr   zZ`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...Fr   r@   r   r   c                 s   s    | ]	}|d ur|V  qd S r   r   )r#   vr   r   r'   	<genexpr>  s    z*MarkupLMEncoder.forward.<locals>.<genexpr>)last_hidden_stater  r   
attentionscross_attentions)r&   r   r  trainingloggerwarning_once	enumerater  _gradient_checkpointing_func__call__rz   r   )r=   r   r   r   r   r   r  r   r   r  r  all_hidden_statesall_self_attentionsall_cross_attentionsnext_decoder_cacherJ   layer_modulelayer_head_maskr   layer_outputsr   r   r'   rL   j  sz   


zMarkupLMEncoder.forward)	NNNNNNFFT)rM   rN   rO   r,   rD   r   r   r   r   r   r   r   rL   rQ   r   r   r>   r'   r
  c  sD    		
r
  c                       sL   e Zd ZdZeZeZdZdd Z	e
deeeejf  f fddZ  ZS )MarkupLMPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    markuplmc                 C   s   t |tjr |jjjd| jjd |jdur|jj	  dS dS t |tj
rC|jjjd| jjd |jdurA|jj|j 	  dS dS t |tjrX|jj	  |jjd dS dS )zInitialize the weightsg        )meanstdN      ?)r   r   r.   weightdatanormal_r&   initializer_ranger   zero_r   rX   ri   fill_)r=   moduler   r   r'   _init_weights  s   

z%MarkupLMPreTrainedModel._init_weightspretrained_model_name_or_pathc                    s   t t| j|g|R i |S r   )r+   r#  from_pretrained)clsr0  
model_argskwargsr>   r   r'   r1    s   
z'MarkupLMPreTrainedModel.from_pretrained)rM   rN   rO   rP   r   config_class&MARKUPLM_PRETRAINED_MODEL_ARCHIVE_LISTpretrained_model_archive_mapbase_model_prefixr/  classmethodr   r   r   osPathLiker1  rQ   r   r   r>   r'   r#    s    *r#  aK  
    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) sub-class. Use
    it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and
    behavior.

    Parameters:
        config ([`MarkupLMConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
a  
    Args:
        input_ids (`torch.LongTensor` of shape `({0})`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)

        xpath_tags_seq (`torch.LongTensor` of shape `({0}, config.max_depth)`, *optional*):
            Tag IDs for each token in the input sequence, padded up to config.max_depth.

        xpath_subs_seq (`torch.LongTensor` of shape `({0}, config.max_depth)`, *optional*):
            Subscript IDs for each token in the input sequence, padded up to config.max_depth.

        attention_mask (`torch.FloatTensor` of shape `({0})`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`: `1` for
            tokens that are NOT MASKED, `0` for MASKED tokens.

            [What are attention masks?](../glossary#attention-mask)
        token_type_ids (`torch.LongTensor` of shape `({0})`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`: `0` corresponds to a *sentence A* token, `1` corresponds to a *sentence B* token

            [What are token type IDs?](../glossary#token-type-ids)
        position_ids (`torch.LongTensor` of shape `({0})`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`: `1`
            indicates the head is **not masked**, `0` indicates the head is **masked**.
        inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert *input_ids* indices into associated vectors than the
            model's internal embedding lookup matrix.
        output_attentions (`bool`, *optional*):
            If set to `True`, the attentions tensors of all attention layers are returned. See `attentions` under
            returned tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            If set to `True`, the hidden states of all layers are returned. See `hidden_states` under returned tensors
            for more detail.
        return_dict (`bool`, *optional*):
            If set to `True`, the model will return a [`~file_utils.ModelOutput`] instead of a plain tuple.
zbThe bare MarkupLM Model transformer outputting raw hidden-states without any specific head on top.c                       s   e Zd Zd fdd	Zdd Zdd Zdd	 Zee	d
e
eed											d deej deej deej deej deej deej deej deej dee dee dee deeef fddZ	d!ddZdd Z  ZS )"MarkupLMModelTc                    sD   t  | || _t|| _t|| _|rt|nd | _| 	  d S r   )
r+   r,   r&   r]   r   r
  encoderr   pooler	post_init)r=   r&   add_pooling_layerr>   r   r'   r,   $  s   

zMarkupLMModel.__init__c                 C   s   | j jS r   r   rd   )r=   r   r   r'   get_input_embeddings0  s   z"MarkupLMModel.get_input_embeddingsc                 C   s   || j _d S r   rA  )r=   r   r   r   r'   set_input_embeddings3  s   z"MarkupLMModel.set_input_embeddingsc                 C   s*   |  D ]\}}| jj| j| qdS )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr=  r  r   r   )r=   heads_to_pruner  r   r   r   r'   _prune_heads6  s   zMarkupLMModel._prune_headsbatch_size, sequence_lengthoutput_typer5  NrW   rF   rG   r   r}   r`   r   rs   r   r  r  r   c                 C   s  |	dur|	n| j j}	|
dur|
n| j j}
|dur|n| j j}|dur*|dur*td|dur9| || | }n|durF| dd }ntd|durQ|jn|j}|du r_tj	||d}|du rltj
|tj|d}|dd}|j| jd	}d
| d }|dur| dkr|dddd}|| j jdddd}n| dkr|ddd}|jt|  jd	}ndg| j j }| j||||||d}| j||||	|
|d}|d }| jdur| |nd}|s||f|dd  S t|||j|j|jdS )a`  
        Returns:

        Examples:

        ```python
        >>> from transformers import AutoProcessor, MarkupLMModel

        >>> processor = AutoProcessor.from_pretrained("microsoft/markuplm-base")
        >>> model = MarkupLMModel.from_pretrained("microsoft/markuplm-base")

        >>> html_string = "<html> <head> <title>Page Title</title> </head> </html>"

        >>> encoding = processor(html_string, return_tensors="pt")

        >>> outputs = model(**encoding)
        >>> last_hidden_states = outputs.last_hidden_state
        >>> list(last_hidden_states.shape)
        [1, 4, 768]
        ```NzDYou cannot specify both input_ids and inputs_embeds at the same timer@   z5You have to specify either input_ids or inputs_embeds)rp   rn   r   r   r   r'  g     r   )rW   rF   rG   r`   r}   rs   )r   r   r  r  )r  pooler_outputr   r  r  )r&   r   r  use_return_dictr   %warn_if_padding_and_no_attention_maskrq   rp   rD   ry   rw   rV   rr   r   ro   rB   rm   r  next
parametersr   r=  r>  r   r   r  r  )r=   rW   rF   rG   r   r}   r`   r   rs   r   r  r  rt   rp   extended_attention_maskembedding_outputencoder_outputsr   r   r   r   r'   rL   >  sn   $
zMarkupLMModel.forwardc           	      K   sv   |j }|d u r||}|d ur4|d d j d }|j d |kr#|}n|j d d }|d d |d f }||||dS )Nr   r   r   )rW   r   r  r   )r   new_ones)	r=   rW   r  r   r   model_kwargsrt   past_lengthremove_prefix_lengthr   r   r'   prepare_inputs_for_generation  s   
z+MarkupLMModel.prepare_inputs_for_generationc                    s.   d}|D ]}|t  fdd|D f7 }q|S )Nr   c                 3   s$    | ]}| d  |jV  qdS )r   N)index_selectr   rp   )r#   
past_statebeam_idxr   r'   r    s   " z/MarkupLMModel._reorder_cache.<locals>.<genexpr>)rz   )r=   r  rZ  reordered_past
layer_pastr   rY  r'   _reorder_cache  s   zMarkupLMModel._reorder_cache)T)NNNNNNNNNNN)NNT)rM   rN   rO   r,   rB  rC  rF  r   MARKUPLM_INPUTS_DOCSTRINGformatr   r   _CONFIG_FOR_DOCr   rD   
LongTensorr   r   r   r   rL   rV  r]  rQ   r   r   r>   r'   r<    s\    
	

i
r<  z
    MarkupLM Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear
    layers on top of the hidden-states output to compute `span start logits` and `span end logits`).
    c                !       s   e Zd Z fddZeedeee	d													dde
ej de
ej de
ej d	e
ej d
e
ej de
ej de
ej de
ej de
ej de
ej de
e de
e de
e deeej ef fddZ  ZS )MarkupLMForQuestionAnsweringc                    s@   t  | |j| _t|dd| _t|j|j| _| 	  d S NF)r@  )
r+   r,   
num_labelsr<  r$  r   r.   r/   
qa_outputsr?  r<   r>   r   r'   r,     s
   z%MarkupLMForQuestionAnswering.__init__rG  rH  NrW   rF   rG   r   r}   r`   r   rs   start_positionsend_positionsr   r  r  r   c                 C   sL  |dur|n| j j}| j|||||||||||d}|d }| |}|jddd\}}|d }|d }d}|	dur|
durt|	 dkrQ|	d}	t|
 dkr^|
d}
|d}|		d| |
	d| t
|d}|||	}|||
}|| d }|s||f|dd  }|dur|f| S |S t||||j|jd	S )
a  
        start_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for position (index) of the start of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.
        end_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for position (index) of the end of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.

        Returns:

        Examples:

        ```python
        >>> from transformers import AutoProcessor, MarkupLMForQuestionAnswering
        >>> import torch

        >>> processor = AutoProcessor.from_pretrained("microsoft/markuplm-base-finetuned-websrc")
        >>> model = MarkupLMForQuestionAnswering.from_pretrained("microsoft/markuplm-base-finetuned-websrc")

        >>> html_string = "<html> <head> <title>My name is Niels</title> </head> </html>"
        >>> question = "What's his name?"

        >>> encoding = processor(html_string, questions=question, return_tensors="pt")

        >>> with torch.no_grad():
        ...     outputs = model(**encoding)

        >>> answer_start_index = outputs.start_logits.argmax()
        >>> answer_end_index = outputs.end_logits.argmax()

        >>> predict_answer_tokens = encoding.input_ids[0, answer_start_index : answer_end_index + 1]
        >>> processor.decode(predict_answer_tokens).strip()
        'Niels'
        ```N
rF   rG   r   r}   r`   r   rs   r   r  r  r   r   r@   rA   )ignore_indexr   )lossstart_logits
end_logitsr   r  )r&   rK  r$  re  splitsqueezer   r   rq   clamp_r   r   r   r  )r=   rW   rF   rG   r   r}   r`   r   rs   rf  rg  r   r  r  r   r   logitsrk  rl  
total_lossignored_indexloss_fct
start_lossend_lossr   r   r   r'   rL     sT   6






z$MarkupLMForQuestionAnswering.forward)NNNNNNNNNNNNN)rM   rN   rO   r,   r   r^  r_  r   r   r`  r   rD   r   r   r   r   rL   rQ   r   r   r>   r'   rb    s\    	

	
rb  z9MarkupLM Model with a `token_classification` head on top.c                          e Zd Z fddZeedeee	d												dde
ej de
ej de
ej d	e
ej d
e
ej de
ej de
ej de
ej de
ej de
e de
e de
e deeej ef fddZ  ZS )MarkupLMForTokenClassificationc                    sb   t  | |j| _t|dd| _|jd ur|jn|j}t|| _	t
|j|j| _|   d S rc  )r+   r,   rd  r<  r$  classifier_dropoutr2   r   r1   r3   r.   r/   
classifierr?  r=   r&   rx  r>   r   r'   r,   O  s   z'MarkupLMForTokenClassification.__init__rG  rH  NrW   rF   rG   r   r}   r`   r   rs   labelsr   r  r  r   c                 C   s   |dur|n| j j}| j|||||||||
||d}|d }| |}d}|	dur:t }||d| j j|	d}|sP|f|dd  }|durN|f| S |S t|||j|j	dS )a  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.

        Returns:

        Examples:

        ```python
        >>> from transformers import AutoProcessor, AutoModelForTokenClassification
        >>> import torch

        >>> processor = AutoProcessor.from_pretrained("microsoft/markuplm-base")
        >>> processor.parse_html = False
        >>> model = AutoModelForTokenClassification.from_pretrained("microsoft/markuplm-base", num_labels=7)

        >>> nodes = ["hello", "world"]
        >>> xpaths = ["/html/body/div/li[1]/div/span", "/html/body/div/li[1]/div/span"]
        >>> node_labels = [1, 2]
        >>> encoding = processor(nodes=nodes, xpaths=xpaths, node_labels=node_labels, return_tensors="pt")

        >>> with torch.no_grad():
        ...     outputs = model(**encoding)

        >>> loss = outputs.loss
        >>> logits = outputs.logits
        ```Nrh  r   r@   r   rj  rp  r   r  )
r&   rK  r$  ry  r   r   rd  r   r   r  )r=   rW   rF   rG   r   r}   r`   r   rs   r{  r   r  r  r   r   r   rj  rs  r   r   r   r'   rL   ]  s@   ,
z&MarkupLMForTokenClassification.forwardNNNNNNNNNNNN)rM   rN   rO   r,   r   r^  r_  r   r   r`  r   rD   r   r   r   r   rL   rQ   r   r   r>   r'   rw  L  sV    
	
rw  z
    MarkupLM Model transformer with a sequence classification/regression head on top (a linear layer on top of the
    pooled output) e.g. for GLUE tasks.
    c                       rv  )!MarkupLMForSequenceClassificationc                    sd   t  | |j| _|| _t|| _|jd ur|jn|j}t	|| _
t|j|j| _|   d S r   )r+   r,   rd  r&   r<  r$  rx  r2   r   r1   r3   r.   r/   ry  r?  rz  r>   r   r'   r,     s   
z*MarkupLMForSequenceClassification.__init__rG  rH  NrW   rF   rG   r   r}   r`   r   rs   r{  r   r  r  r   c                 C   sv  |dur|n| j j}| j|||||||||
||d}|d }| |}| |}d}|	dur| j jdu rX| jdkr>d| j _n| jdkrT|	jtj	ksO|	jtj
krTd| j _nd| j _| j jdkrvt }| jdkrp|| |	 }n+|||	}n%| j jdkrt }||d| j|	d}n| j jdkrt }|||	}|s|f|dd  }|dur|f| S |S t|||j|jd	S )
a&  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).

        Returns:

        Examples:

        ```python
        >>> from transformers import AutoProcessor, AutoModelForSequenceClassification
        >>> import torch

        >>> processor = AutoProcessor.from_pretrained("microsoft/markuplm-base")
        >>> model = AutoModelForSequenceClassification.from_pretrained("microsoft/markuplm-base", num_labels=7)

        >>> html_string = "<html> <head> <title>Page Title</title> </head> </html>"
        >>> encoding = processor(html_string, return_tensors="pt")

        >>> with torch.no_grad():
        ...     outputs = model(**encoding)

        >>> loss = outputs.loss
        >>> logits = outputs.logits
        ```Nrh  r   
regressionsingle_label_classificationmulti_label_classificationr@   r   r|  )r&   rK  r$  r3   ry  problem_typerd  ro   rD   rV   rS   r   rn  r   r   r   r   r   r  )r=   rW   rF   rG   r   r}   r`   r   rs   r{  r   r  r  r   r   rp  rj  rs  r   r   r   r'   rL     sZ   +



"


z)MarkupLMForSequenceClassification.forwardr}  )rM   rN   rO   r,   r   r^  r_  r   r   r`  r   rD   r   r   r   r   rL   rQ   r   r   r>   r'   r~    sV    	
	
r~  )r   )@rP   r   r:  typingr   r   r   rD   torch.utils.checkpointr   torch.nnr   r   r   activationsr
   
file_utilsr   r   r   modeling_outputsr   r   r   r   r   r   modeling_utilsr   r   r   r   utilsr   configuration_markuplmr   
get_loggerrM   r  _CHECKPOINT_FOR_DOCr`  r6  Moduler   r\   r]   r   r   r   r   r   r   r   r   r   r   r
  r#  MARKUPLM_START_DOCSTRINGr^  r<  rb  rw  r~  r   r   r   r'   <module>   sp    

3c 2W]"1 -wc