o
    h                     @   sd   d Z ddlZddlZddlmZmZ ddlmZ ddlm	Z	 ddl
mZ e	eZG d	d
 d
ZdS )zTokenization classes for RAG.    N)ListOptional   )BatchEncoding)logging   )	RagConfigc                   @   s   e Zd Zdd Zdd Zedd Zdd Zd	d
 Zdd Z	dd Z
dd Z						ddee deee  dee dee dedededefddZdS )RagTokenizerc                 C   s   || _ || _| j | _d S N)question_encoder	generatorcurrent_tokenizer)selfr   r    r   ^/var/www/html/ai/venv/lib/python3.10/site-packages/transformers/models/rag/tokenization_rag.py__init__   s   zRagTokenizer.__init__c                 C   sb   t j|rtd| dt j|dd t j|d}t j|d}| j| | j| d S )NzProvided path (z#) should be a directory, not a fileT)exist_okquestion_encoder_tokenizergenerator_tokenizer)	ospathisfile
ValueErrormakedirsjoinr   save_pretrainedr   )r   save_directoryquestion_encoder_pathgenerator_pathr   r   r   r   "   s   zRagTokenizer.save_pretrainedc                 K   sZ   ddl m} |dd }|d u rt|}|j||jdd}|j||jdd}| ||dS )N   )AutoTokenizerconfigr   )r!   	subfolderr   )r   r   )auto.tokenization_autor    popr   from_pretrainedr   r   )clspretrained_model_name_or_pathkwargsr    r!   r   r   r   r   r   r%   +   s   
zRagTokenizer.from_pretrainedc                 O   s   | j |i |S r
   )r   r   argsr(   r   r   r   __call__=   s   zRagTokenizer.__call__c                 O      | j j|i |S r
   )r   batch_decoder)   r   r   r   r-   @      zRagTokenizer.batch_decodec                 O   r,   r
   )r   decoder)   r   r   r   r/   C   r.   zRagTokenizer.decodec                 C      | j | _d S r
   )r   r   r   r   r   r   _switch_to_input_modeF      z"RagTokenizer._switch_to_input_modec                 C   r0   r
   )r   r   r1   r   r   r   _switch_to_target_modeI   r3   z#RagTokenizer._switch_to_target_modeNlongestT	src_texts	tgt_texts
max_lengthmax_target_lengthpaddingreturn_tensors
truncationreturnc              	   K   s   t dt |d u r| jj}| |fd||||d|}	|d u r"|	S |d u r*| jj}| d|d||||d|}
|
d |	d< |	S )Nu4  `prepare_seq2seq_batch` is deprecated and will be removed in version 5 of 🤗 Transformers. Use the regular `__call__` method to prepare your inputs and the tokenizer under the `with_target_tokenizer` context manager to prepare your targets. See the documentation of your specific tokenizer for more detailsT)add_special_tokensr;   r8   r:   r<   )text_targetr>   r;   r:   r8   r<   	input_idslabelsr   )warningswarnFutureWarningr   model_max_length)r   r6   r7   r8   r9   r:   r;   r<   r(   model_inputsrA   r   r   r   prepare_seq2seq_batchL   sB   		z"RagTokenizer.prepare_seq2seq_batch)NNNr5   NT)__name__
__module____qualname__r   r   classmethodr%   r+   r-   r/   r2   r4   r   strr   intboolr   rG   r   r   r   r   r	      sB    	


r	   )__doc__r   rB   typingr   r   tokenization_utils_baser   utilsr   configuration_ragr   
get_loggerrH   loggerr	   r   r   r   r   <module>   s   
