o
    h                     @   s`   d dl mZ d dlZddlmZmZ ddlmZ ddlm	Z	 er&d dl
mZ G d	d
 d
e	ZdS )    )TYPE_CHECKINGN   )#AutoModelForVisualQuestionAnsweringAutoProcessor)requires_backends   )PipelineTool)Imagec                       s`   e Zd ZdZdZdZeZeZ	ddgZ
dgZ fddZddd	efd
dZdd Zdd Z  ZS )ImageQuestionAnsweringToolzdandelin/vilt-b32-finetuned-vqaa  This is a tool that answers a question about an image. It takes an input named `image` which should be the image containing the information, as well as a `question` which should be the question in English. It returns a text that is the answer to the question.image_qaimagetextc                    s"   t | dg t j|i | d S )Nvision)r   super__init__)selfargskwargs	__class__ a/var/www/html/ai/venv/lib/python3.10/site-packages/transformers/tools/image_question_answering.pyr   ,   s   z#ImageQuestionAnsweringTool.__init__r	   questionc                 C   s   | j ||ddS )Npt)return_tensors)pre_processor)r   r   r   r   r   r   encode0   s   z!ImageQuestionAnsweringTool.encodec                 C   s>   t   | jdi |jW  d    S 1 sw   Y  d S )Nr   )torchno_gradmodellogits)r   inputsr   r   r   forward3   s   
$z"ImageQuestionAnsweringTool.forwardc                 C   s   | d }| jjj| S )N)argmaxitemr   configid2label)r   outputsidxr   r   r   decode7   s   z!ImageQuestionAnsweringTool.decode)__name__
__module____qualname__default_checkpointdescriptionnamer   pre_processor_classr   model_classr!   r(   r   strr   r"   r*   __classcell__r   r   r   r   r
      s    r
   )typingr   r   models.autor   r   utilsr   baser   PILr	   r
   r   r   r   r   <module>   s   