o
    óÜÓhC   ã                   @   s<   d Z ddlmZ ddlmZ dZeeƒG dd„ deƒƒZdS )z RAG model configurationé   )ÚPretrainedConfig)Úadd_start_docstringsa)  
    [`RagConfig`] stores the configuration of a *RagModel*. Configuration objects inherit from [`PretrainedConfig`] and
    can be used to control the model outputs. Read the documentation from [`PretrainedConfig`] for more information.

    Args:
        title_sep (`str`, *optional*, defaults to  `" / "`):
            Separator inserted between the title and the text of the retrieved document when calling [`RagRetriever`].
        doc_sep (`str`, *optional*, defaults to  `" // "`):
            Separator inserted between the text of the retrieved document and the original input when calling
            [`RagRetriever`].
        n_docs (`int`, *optional*, defaults to 5):
            Number of documents to retrieve.
        max_combined_length (`int`, *optional*, defaults to 300):
            Max length of contextualized input returned by [`~RagRetriever.__call__`].
        retrieval_vector_size (`int`, *optional*, defaults to 768):
            Dimensionality of the document embeddings indexed by [`RagRetriever`].
        retrieval_batch_size (`int`, *optional*, defaults to 8):
            Retrieval batch size, defined as the number of queries issues concurrently to the faiss index encapsulated
            [`RagRetriever`].
        dataset (`str`, *optional*, defaults to `"wiki_dpr"`):
            A dataset identifier of the indexed dataset in HuggingFace Datasets (list all available datasets and ids
            using `datasets.list_datasets()`).
        dataset_split (`str`, *optional*, defaults to `"train"`)
            Which split of the `dataset` to load.
        index_name (`str`, *optional*, defaults to `"compressed"`)
            The index name of the index associated with the `dataset`. One can choose between `"legacy"`, `"exact"` and
            `"compressed"`.
        index_path (`str`, *optional*)
            The path to the serialized faiss index on disk.
        passages_path (`str`, *optional*):
            A path to text passages compatible with the faiss index. Required if using
            [`~models.rag.retrieval_rag.LegacyIndex`]
        use_dummy_dataset (`bool`, *optional*, defaults to `False`)
            Whether to load a "dummy" variant of the dataset specified by `dataset`.
        label_smoothing (`float`, *optional*, defaults to 0.0):
            Only relevant if `return_loss` is set to `True`. Controls the `epsilon` parameter value for label smoothing
            in the loss calculation. If set to 0, no label smoothing is performed.
        do_marginalize (`bool`, *optional*, defaults to `False`):
            If `True`, the logits are marginalized over all documents by making use of
            `torch.nn.functional.log_softmax`.
        reduce_loss (`bool`, *optional*, defaults to `False`):
            Whether or not to reduce the NLL loss using the `torch.Tensor.sum` operation.
        do_deduplication (`bool`, *optional*, defaults to `True`):
            Whether or not to deduplicate the generations from different context documents for a given input. Has to be
            set to `False` if used while training with distributed backend.
        exclude_bos_score (`bool`, *optional*, defaults to `False`):
            Whether or not to disregard the BOS token when computing the loss.
        output_retrieved(`bool`, *optional*, defaults to `False`):
            If set to `True`, `retrieved_doc_embeds`, `retrieved_doc_ids`, `context_input_ids` and
            `context_attention_mask` are returned. See returned tensors for more detail.
        use_cache (`bool`, *optional*, defaults to `True`):
            Whether or not the model should return the last key/values attentions (not used by all models).
        forced_eos_token_id (`int`, *optional*):
            The id of the token to force as the last generated token when `max_length` is reached. Usually set to
            `eos_token_id`.
c                       sv   e Zd ZdZdZ															
													d‡ fdd„	Zedededefdd„ƒZ‡  Z	S )Ú	RagConfigÚragTNú / ú // é   é,  é   é   Úwiki_dprÚtrainÚ
compressedFç        c           "         s,  t ƒ jd	||||||||dœ|¤Ž d|v rd|v sJ dƒ‚| d¡}| d¡}| d¡}| d¡} ddlm}! |!j|fi |¤Ž| _|!j| fi |¤Ž| _|| _|| _	|| _
|| _|| _|	| _|
| _|| _|| _|| _|| _|| _|| _|| _|| _|| _|| _|| _|| _| jd u r”t| jdd ƒ| _d S d S )
N)Úbos_token_idÚpad_token_idÚeos_token_idÚdecoder_start_token_idÚforced_eos_token_idÚis_encoder_decoderÚprefixÚ
vocab_sizeÚquestion_encoderÚ	generatorzGConfig has to be initialized with question_encoder and generator configÚ
model_typeé   )Ú
AutoConfigr   © )ÚsuperÚ__init__ÚpopÚauto.configuration_autor   Ú	for_modelr   r   Úreduce_lossÚlabel_smoothingÚexclude_bos_scoreÚdo_marginalizeÚ	title_sepÚdoc_sepÚn_docsÚmax_combined_lengthÚdatasetÚdataset_splitÚ
index_nameÚretrieval_vector_sizeÚretrieval_batch_sizeÚpassages_pathÚ
index_pathÚuse_dummy_datasetÚoutput_retrievedÚdo_deduplicationÚ	use_cacher   Úgetattr)"Úselfr   r   r   r   r   r   r   r'   r(   r)   r*   r.   r/   r+   r,   r-   r1   r0   r2   r#   r$   r4   r%   r&   r3   r5   r   ÚkwargsÚquestion_encoder_configÚquestion_encoder_model_typeÚdecoder_configÚdecoder_model_typer   ©Ú	__class__r   ú_/var/www/html/ai/venv/lib/python3.10/site-packages/transformers/models/rag/configuration_rag.pyr   U   sX   ø	÷ÿ




ÿzRagConfig.__init__r9   Úgenerator_configÚreturnc                 K   s   | d|  ¡ |  ¡ dœ|¤ŽS )a  
        Instantiate a [`EncoderDecoderConfig`] (or a derived class) from a pre-trained encoder model configuration and
        decoder model configuration.

        Returns:
            [`EncoderDecoderConfig`]: An instance of a configuration object
        )r   r   Nr   )Úto_dict)Úclsr9   r@   r8   r   r   r?   Ú'from_question_encoder_generator_configs©   s   z1RagConfig.from_question_encoder_generator_configs)NTNNNNNr   r   r   r	   r
   r   r   r   r   NNFFr   TFFFTN)
Ú__name__Ú
__module__Ú__qualname__r   Úis_compositionr   Úclassmethodr   rD   Ú__classcell__r   r   r=   r?   r   P   sN    äTÿÿþr   N)Ú__doc__Úconfiguration_utilsr   Úutilsr   ÚRAG_CONFIG_DOCr   r   r   r   r?   Ú<module>   s   :