o
    h                     @   sj   d Z ddlmZ ddlmZ ddlmZ ddlmZ e	e
ZddiZG d	d
 d
eZG dd deZdS )z UMT5 model configuration    )Mapping   )PretrainedConfig)OnnxSeq2SeqConfigWithPast)loggingzgoogle/umt5-smallzAhttps://huggingface.co/google/umt5-small/resolve/main/config.jsonc                       sz   e Zd ZdZdZdgZ								
														d fdd	Zedd Zedd Z	edd Z
  ZS )
UMT5ConfigaR  
    This is the configuration class to store the configuration of a [`UMT5Model`]. It is used to instantiate a UMT5
    model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
    defaults will yield a similar configuration to that of the UMT5
    [google/umt5-small](https://huggingface.co/google/umt5-small) architecture.

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.

    Arguments:
        vocab_size (`int`, *optional*, defaults to 250112):
            Vocabulary size of the UMT5 model. Defines the number of different tokens that can be represented by the
            `inputs_ids` passed when calling [`UMT5Model`] or [`TFUMT5Model`].
        d_model (`int`, *optional*, defaults to 512):
            Size of the encoder layers and the pooler layer.
        d_kv (`int`, *optional*, defaults to 64):
            Size of the key, query, value projections per attention head. `d_kv` has to be equal to `d_model //
            num_heads`.
        d_ff (`int`, *optional*, defaults to 1024):
            Size of the intermediate feed forward layer in each `UMT5Block`.
        num_layers (`int`, *optional*, defaults to 8):
            Number of hidden layers in the Transformer encoder.
        num_decoder_layers (`int`, *optional*):
            Number of hidden layers in the Transformer decoder. Will use the same value as `num_layers` if not set.
        num_heads (`int`, *optional*, defaults to 6):
            Number of attention heads for each attention layer in the Transformer encoder.
        relative_attention_num_buckets (`int`, *optional*, defaults to 32):
            The number of buckets to use for each attention layer.
        relative_attention_max_distance (`int`, *optional*, defaults to 128):
            The maximum distance of the longer sequences for the bucket separation.
        dropout_rate (`float`, *optional*, defaults to 0.1):
            The ratio for all dropout layers.
        classifier_dropout (`float`, *optional*, defaults to 0.0):
            The dropout ratio for classifier.
        layer_norm_eps (`float`, *optional*, defaults to 1e-6):
            The epsilon used by the layer normalization layers.
        initializer_factor (`float`, *optional*, defaults to 1):
            A factor for initializing all weight matrices (should be kept to 1, used internally for initialization
            testing).
        feed_forward_proj (`string`, *optional*, defaults to `"gated-gelu"`):
            Type of feed forward layer to be used. Should be one of `"relu"` or `"gated-gelu"`.
        use_cache (`bool`, *optional*, defaults to `True`):
            Whether or not the model should return the last key/values attentions (not used by all models).
    umt5past_key_values     @         N          皙?ư>      ?
gated-geluTT5Tokenizerr              c              	      s   t  jd||||||d| || _|| _|| _|| _|| _|d ur%|n| j| _|| _|| _	|	| _
|
| _|| _|| _|| _|| _|| _| jd}|d | _|d dk| _t|dkrb|d dksht|dkrptd| d	|d
kryd| _d S d S )N)is_encoder_decodertokenizer_classtie_word_embeddingspad_token_ideos_token_iddecoder_start_token_id-r   gatedr      z`feed_forward_proj`: z is not a valid activation function of the dense layer. Please make sure `feed_forward_proj` is of the format `gated-{ACT_FN}` or `{ACT_FN}`, e.g. 'gated-gelu' or 'relu'r   gelu_new )super__init__
vocab_sized_modeld_kvd_ff
num_layersnum_decoder_layers	num_headsrelative_attention_num_bucketsrelative_attention_max_distancedropout_rateclassifier_dropoutlayer_norm_epsiloninitializer_factorfeed_forward_proj	use_cachesplitdense_act_fnis_gated_actlen
ValueError)selfr'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r2   r3   r4   r   r5   r   r   r   r   r   r1   kwargsact_info	__class__r$   a/var/www/html/ai/venv/lib/python3.10/site-packages/transformers/models/umt5/configuration_umt5.pyr&   P   sH   	
$

zUMT5Config.__init__c                 C      | j S N)r(   r;   r$   r$   r@   hidden_size      zUMT5Config.hidden_sizec                 C   rA   rB   )r-   rC   r$   r$   r@   num_attention_heads   rE   zUMT5Config.num_attention_headsc                 C   rA   rB   )r+   rC   r$   r$   r@   num_hidden_layers   rE   zUMT5Config.num_hidden_layers)r
   r   r   r   r   Nr   r   r   r   r   r   r   TTr   Tr   r   r   r   )__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferencer&   propertyrD   rF   rG   __classcell__r$   r$   r>   r@   r      s@    -B

r   c                   @   sR   e Zd Zedeeeeef f fddZedefddZede	fddZ
dS )	UMT5OnnxConfigreturnc                 C   sx   ddddddd}| j r"d|d d< ddi|d	< dd
d|d< nddd|d	< ddd|d< | j r:| j|dd |S )Nbatchencoder_sequence)r   r   )	input_idsattention_maskz past_encoder_sequence + sequencerU   r   r   decoder_input_idsz past_decoder_sequence + sequencedecoder_attention_maskdecoder_sequenceinputs)	direction)use_pastfill_with_past_key_values_)r;   common_inputsr$   r$   r@   rY      s   zUMT5OnnxConfig.inputsc                 C      dS )N   r$   rC   r$   r$   r@   default_onnx_opset   s   z!UMT5OnnxConfig.default_onnx_opsetc                 C   r^   )NgMb@?r$   rC   r$   r$   r@   atol_for_validation   s   z"UMT5OnnxConfig.atol_for_validationN)rH   rI   rJ   rN   r   strintrY   r`   floatra   r$   r$   r$   r@   rP      s     rP   N)rK   typingr   configuration_utilsr   onnxr   utilsr   
get_loggerrH   logger"UMT5_PRETRAINED_CONFIG_ARCHIVE_MAPr   rP   r$   r$   r$   r@   <module>   s   
 