o
    hj                     @   s6  d dl Z d dlmZmZmZmZmZ d dlZddl	m
Z
mZmZmZmZ ddlmZmZmZmZmZ ddlmZmZmZmZmZ e rMd dlZddl	mZ e rTd dlZe r[d dlZe rdd dl mZ! 	dRdej"d	ee
e#f d
eee
e#f  dej"fddZ$dej%dfdej"de&dee
 dej'deee#e
f  dej"fddZ(dd Z)		dSdeej"ddddf dee* deee#e
f  ddfddZ+			dTdej"dee,ee,e,f ee, ee, f d e*d!ee, deee#e
f  de-fd"d#Z.					dUdej"dee,e,f d$d%d&ee, dee
 d'e*deee#e
f  dej"fd(d)Z/		dSdej"d*ee&ee& f d+ee&ee& f dee
 deee#e
f  dej"fd,d-Z0			dVdej"dee,e,f deee#e
f  deee#e
f  d'ee* dej"fd.d/Z1dWd1d2Z2d0ej"dej"fd3d4Z3dXd5d6Z4d0edefd7d8Z5dYd:d;Z6d9ej"dej"fd<d=Z7dZd>d?Z8d9edefd@dAZ9dBdC Z:dDdE Z;G dFdG dGeZ<e<j=dHddfdej"dIee,ee,e,f eee,e,f  f dJe<dKee&ee& f deee#e
f  deee#e
f  dej"fdLdMZ>dedefdNdOZ?		dSdej"dee
 deee#e
f  dej"fdPdQZ@dS )[    N)IterableListOptionalTupleUnion   )ChannelDimension
ImageInputget_channel_dimension_axisget_image_sizeinfer_channel_dimension_format)ExplicitEnum
TensorTypeis_jax_tensoris_tf_tensoris_torch_tensor)is_flax_availableis_tf_availableis_torch_availableis_vision_availablerequires_backends)PILImageResamplingimagechannel_diminput_channel_dimreturnc                 C   s   t | tjstdt|  |du rt| }t|}||kr!| S |tjkr-| d} | S |tj	kr9| d} | S td
|)a)  
    Converts `image` to the channel dimension format specified by `channel_dim`.

    Args:
        image (`numpy.ndarray`):
            The image to have its channel dimension set.
        channel_dim (`ChannelDimension`):
            The channel dimension format to use.
        input_channel_dim (`ChannelDimension`, *optional*):
            The channel dimension format of the input image. If not provided, it will be inferred from the input image.

    Returns:
        `np.ndarray`: The image with the channel dimension set to `channel_dim`.
    ,Input image must be of type np.ndarray, got N)   r   r   )r   r   r   z(Unsupported channel dimension format: {})
isinstancenpndarray
ValueErrortyper   r   FIRST	transposeLASTformat)r   r   r   target_channel_dim r(   S/var/www/html/ai/venv/lib/python3.10/site-packages/transformers/image_transforms.pyto_channel_dimension_format5   s   



r*   scaledata_formatdtypeinput_data_formatc                 C   sH   t | tjstdt|  | | }|durt|||}||}|S )a  
    Rescales `image` by `scale`.

    Args:
        image (`np.ndarray`):
            The image to rescale.
        scale (`float`):
            The scale to use for rescaling the image.
        data_format (`ChannelDimension`, *optional*):
            The channel dimension format of the image. If not provided, it will be the same as the input image.
        dtype (`np.dtype`, *optional*, defaults to `np.float32`):
            The dtype of the output image. Defaults to `np.float32`. Used for backwards compatibility with feature
            extractors.
        input_data_format (`ChannelDimension`, *optional*):
            The channel dimension format of the input image. If not provided, it will be inferred from the input image.

    Returns:
        `np.ndarray`: The rescaled image.
    r   N)r   r   r    r!   r"   r*   astype)r   r+   r,   r-   r.   rescaled_imager(   r(   r)   rescale\   s   
r1   c                 C   s   | j tjkr
d}|S t| | tr4td| kr%t| dkr%d}|S td|   d| 	  dtd| krFt| dkrFd}|S td	|   d| 	  d)
z
    Detects whether or not the image needs to be rescaled before being converted to a PIL image.

    The assumption is that if the image is of type `np.float` and all values are between 0 and 1, it needs to be
    rescaled.
    Fr      zZThe image to be converted to a PIL image contains values outside the range [0, 255], got [z, z%] which cannot be converted to uint8.r   TzXThe image to be converted to a PIL image contains values outside the range [0, 1], got [)
r-   r   uint8allcloser/   intallr!   minmax)r   
do_rescaler(   r(   r)   _rescale_for_pil_conversion   s0   r:   zPIL.Image.Imagetorch.Tensor	tf.Tensorzjnp.ndarrayr9   c                 C   s   t tdg t| tjjr| S t| st| r|  } nt| r&t	
| } nt| t	js5tdt| t| tj|} | jd dkrJt	j| ddn| } |du rTt| n|}|r]t| d} | t	j} tj| S )aI  
    Converts `image` to a PIL Image. Optionally rescales it and puts the channel dimension back as the last axis if
    needed.

    Args:
        image (`PIL.Image.Image` or `numpy.ndarray` or `torch.Tensor` or `tf.Tensor`):
            The image to convert to the `PIL.Image` format.
        do_rescale (`bool`, *optional*):
            Whether or not to apply the scaling factor (to make pixel values integers between 0 and 255). Will default
            to `True` if the image type is a floating type and casting to `int` would result in a loss of precision,
            and `False` otherwise.
        input_data_format (`ChannelDimension`, *optional*):
            The channel dimension format of the input image. If unset, will use the inferred format from the input.

    Returns:
        `PIL.Image.Image`: The converted image.
    visionz"Input image type not supported: {}r   axisNr2   )r   to_pil_imager   PILImager   r   numpyr   r   arrayr    r!   r&   r"   r*   r   r%   shapesqueezer:   r1   r/   r3   	fromarray)r   r9   r.   r(   r(   r)   rA      s    
 
rA   Tinput_imagesizedefault_to_squaremax_sizec                 C   s   t |ttfr t|dkrt|S t|dkr|d }ntd|r&||fS t| |\}}||kr5||fn||f\}}|}	|	t|	| | }
}|duri||	krZtd| d| ||krit||
 | |}
}||krq||
fS |
|fS )a  
    Find the target (height, width) dimension of the output image after resizing given the input image and the desired
    size.

    Args:
        input_image (`np.ndarray`):
            The image to resize.
        size (`int` or `Tuple[int, int]` or List[int] or Tuple[int]):
            The size to use for resizing the image. If `size` is a sequence like (h, w), output size will be matched to
            this.

            If `size` is an int and `default_to_square` is `True`, then image will be resized to (size, size). If
            `size` is an int and `default_to_square` is `False`, then smaller edge of the image will be matched to this
            number. i.e, if height > width, then image will be rescaled to (size * height / width, size).
        default_to_square (`bool`, *optional*, defaults to `True`):
            How to convert `size` when it is a single int. If set to `True`, the `size` will be converted to a square
            (`size`,`size`). If set to `False`, will replicate
            [`torchvision.transforms.Resize`](https://pytorch.org/vision/stable/transforms.html#torchvision.transforms.Resize)
            with support for resizing only the smallest edge and providing an optional `max_size`.
        max_size (`int`, *optional*):
            The maximum allowed for the longer edge of the resized image: if the longer edge of the image is greater
            than `max_size` after being resized according to `size`, then the image is resized again so that the longer
            edge is equal to `max_size`. As a result, `size` might be overruled, i.e the smaller edge may be shorter
            than `size`. Only used if `default_to_square` is `False`.
        input_data_format (`ChannelDimension`, *optional*):
            The channel dimension format of the input image. If unset, will use the inferred format from the input.

    Returns:
        `tuple`: The target (height, width) dimension of the output image after resizing.
    r   r   r   z7size must have 1 or 2 elements if it is a list or tupleNzmax_size = zN must be strictly greater than the requested size for the smaller edge size = )r   tuplelistlenr!   r   r5   )rI   rJ   rK   rL   r.   heightwidthshortlongrequested_new_short	new_shortnew_longr(   r(   r)   get_resize_output_image_size   s,   %
rW   resampler   reducing_gapreturn_numpyc                 C   s   t tdg |dur|ntj}t|dkstd|du r!t| }|du r'|n|}d}t| tj	j	s=t
| }t| ||d} |\}}	| j|	|f||d}
|rqt|
}
|
jdkr^tj|
dd	n|
}
t|
|tjd
}
|rot|
dn|
}
|
S )a  
    Resizes `image` to `(height, width)` specified by `size` using the PIL library.

    Args:
        image (`np.ndarray`):
            The image to resize.
        size (`Tuple[int, int]`):
            The size to use for resizing the image.
        resample (`int`, *optional*, defaults to `PILImageResampling.BILINEAR`):
            The filter to user for resampling.
        reducing_gap (`int`, *optional*):
            Apply optimization by resizing the image in two steps. The bigger `reducing_gap`, the closer the result to
            the fair resampling. See corresponding Pillow documentation for more details.
        data_format (`ChannelDimension`, *optional*):
            The channel dimension format of the output image. If unset, will use the inferred format from the input.
        return_numpy (`bool`, *optional*, defaults to `True`):
            Whether or not to return the resized image as a numpy array. If False a `PIL.Image.Image` object is
            returned.
        input_data_format (`ChannelDimension`, *optional*):
            The channel dimension format of the input image. If unset, will use the inferred format from the input.

    Returns:
        `np.ndarray`: The resized image.
    r=   Nr   zsize must have 2 elementsF)r9   r.   )rX   rY   r>   r?   r   gp?)r   resizer   BILINEARrO   r!   r   r   rB   rC   r:   rA   r   rE   ndimexpand_dimsr*   r   r%   r1   )r   rJ   rX   rY   r,   rZ   r.   r9   rP   rQ   resized_imager(   r(   r)   r\     s*   !
r\   meanstdc                 C   s2  t | tjs
td|du rt| }t| |d}| j| }t| jtj	s+| 
tj} t |trCt||krBtd| dt| n|g| }tj|| jd}t |trht||krgtd| dt| n|g| }tj|| jd}|tjkr| | | } n| j| | j} |durt| ||} | S | } | S )a  
    Normalizes `image` using the mean and standard deviation specified by `mean` and `std`.

    image = (image - mean) / std

    Args:
        image (`np.ndarray`):
            The image to normalize.
        mean (`float` or `Iterable[float]`):
            The mean to use for normalization.
        std (`float` or `Iterable[float]`):
            The standard deviation to use for normalization.
        data_format (`ChannelDimension`, *optional*):
            The channel dimension format of the output image. If unset, will use the inferred format from the input.
        input_data_format (`ChannelDimension`, *optional*):
            The channel dimension format of the input image. If unset, will use the inferred format from the input.
    zimage must be a numpy arrayN)r.   zmean must have z$ elements if it is an iterable, got r-   zstd must have )r   r   r    r!   r   r
   rF   
issubdtyper-   floatingr/   float32r   rO   rE   r   r%   Tr*   )r   ra   rb   r,   r.   channel_axisnum_channelsr(   r(   r)   	normalize[  s6   





rj   c                 C   s  t tdg |durtdt |du rdn|}t| tjs'tdt	|  t|t
r2t|dkr6td|du r>t| }|durD|n|}t| tj|} t| tj\}}|\}}	t|t|	}}	|| d }
|
| }||	 d }||	 }|
dkr||kr|dkr||kr| d	|
|||f } t| |tj} | S t||}t|	|}| jdd
 ||f }tj| |d}|| d }|| }|| d }|| }| |d	||||f< |
|7 }
||7 }||7 }||7 }|d	td|
t||td|t||f }t||tj}|s
t|}|S )a  
    Crops the `image` to the specified `size` using a center crop. Note that if the image is too small to be cropped to
    the size given, it will be padded (so the returned result will always be of size `size`).

    Args:
        image (`np.ndarray`):
            The image to crop.
        size (`Tuple[int, int]`):
            The target size for the cropped image.
        data_format (`str` or `ChannelDimension`, *optional*):
            The channel dimension format for the output image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
            If unset, will use the inferred format of the input image.
        input_data_format (`str` or `ChannelDimension`, *optional*):
            The channel dimension format for the input image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
            If unset, will use the inferred format of the input image.
        return_numpy (`bool`, *optional*):
            Whether or not to return the cropped image as a numpy array. Used for backwards compatibility with the
            previous ImageFeatureExtractionMixin method.
                - Unset: will return the same type as the input image.
                - `True`: will return a numpy array.
                - `False`: will return a `PIL.Image.Image` object.
    Returns:
        `np.ndarray`: The cropped image.
    r=   Nz8return_numpy is deprecated and will be removed in v.4.33Tr   r   zOsize must have 2 elements representing the height and width of the output imager   .)rF   )r   center_cropwarningswarnFutureWarningr   r   r    r!   r"   r   rO   r   r*   r   r#   r   r5   r8   rF   
zeros_liker7   rA   )r   rJ   r,   r.   rZ   output_data_formatorig_height
orig_widthcrop_height
crop_widthtopbottomleftright
new_height	new_width	new_shape	new_imagetop_pad
bottom_padleft_pad	right_padr(   r(   r)   rl     sR   # 

.rl   bboxes_centerc                 C   sL   |  d\}}}}tj|d|  |d|  |d|  |d|  gdd}|S )Nr>         ?dimunbindtorchstack)r   center_xcenter_yrQ   rP   bbox_cornersr(   r(   r)   _center_to_corners_format_torch  s   *r   c                 C   sH   | j \}}}}tj|d|  |d|  |d|  |d|  gdd}|S )Nr   r>   r?   rg   r   r   r   r   r   rQ   rP   bboxes_cornersr(   r(   r)   _center_to_corners_format_numpy  s   *r   c                 C   sP   t j| dd\}}}}t j|d|  |d|  |d|  |d|  gdd}|S )Nr>   r?   r   tfunstackr   r   r(   r(   r)   _center_to_corners_format_tf  s   *r   c                 C   F   t | rt| S t| tjrt| S t| rt| S tdt	|  )a|  
    Converts bounding boxes from center format to corners format.

    center format: contains the coordinate for the center of the box and its width, height dimensions
        (center_x, center_y, width, height)
    corners format: contains the coodinates for the top-left and bottom-right corners of the box
        (top_left_x, top_left_y, bottom_right_x, bottom_right_y)
    Unsupported input type )
r   r   r   r   r    r   r   r   r!   r"   )r   r(   r(   r)   center_to_corners_format  s   r   r   c                 C   sD   |  d\}}}}|| d || d || || g}tj|ddS )Nr>   r   r   r   )r   
top_left_x
top_left_ybottom_right_xbottom_right_ybr(   r(   r)   _corners_to_center_format_torch.  s   

r   c                 C   s@   | j \}}}}tj|| d || d || || gdd}|S )Nr   r>   r?   r   r   r   r   r   r   r   r(   r(   r)   _corners_to_center_format_numpy9  s   

	r   c                 C   sH   t j| dd\}}}}t j|| d || d || || gdd}|S )Nr>   r?   r   r   r   r(   r(   r)   _corners_to_center_format_tfG  s   

	r   c                 C   r   )a  
    Converts bounding boxes from corners format to center format.

    corners format: contains the coordinates for the top-left and bottom-right corners of the box
        (top_left_x, top_left_y, bottom_right_x, bottom_right_y)
    center format: contains the coordinate for the center of the box and its the width, height dimensions
        (center_x, center_y, width, height)
    r   )
r   r   r   r   r    r   r   r   r!   r"   )r   r(   r(   r)   corners_to_center_formatU  s   
r   c                 C   s   t | tjr>t| jdkr>| jtjkr| tj} | dddddf d| dddddf   d| dddddf   S t	| d d| d   d| d   S )z*
    Converts RGB color to unique ID.
       Nr      r   i   r   )
r   r   r    rO   rF   r-   r3   r/   int32r5   )colorr(   r(   r)   	rgb_to_idl  s
   J$r   c                 C   s   t | tjr1|  }tt| jdg }tj|tjd}t	dD ]}|d |d|f< |d }q |S g }t	dD ]}|
| d  | d } q7|S )z*
    Converts unique ID to RGB color.
    r   rc   r   .)r   r   r    copyrM   rN   rF   zerosr3   rangeappend)id_mapid_map_copy	rgb_shapergb_mapir   _r(   r(   r)   	id_to_rgbw  s   

r   c                   @   s    e Zd ZdZdZdZdZdZdS )PaddingModezP
    Enum class for the different padding modes to use when padding images.
    constantreflect	replicate	symmetricN)__name__
__module____qualname____doc__CONSTANTREFLECT	REPLICATE	SYMMETRICr(   r(   r(   r)   r     s    r   g        paddingmodeconstant_valuesc                    s   du rt   fdd}||}|tjkr&||}tj |d|d n1|tjkr4tj |dd n#|tjkrBtj |dd n|tjkrPtj |d	d ntd
| |durct	 |  S    S )a  
    Pads the `image` with the specified (height, width) `padding` and `mode`.

    Args:
        image (`np.ndarray`):
            The image to pad.
        padding (`int` or `Tuple[int, int]` or `Iterable[Tuple[int, int]]`):
            Padding to apply to the edges of the height, width axes. Can be one of three formats:
            - `((before_height, after_height), (before_width, after_width))` unique pad widths for each axis.
            - `((before, after),)` yields same before and after pad for height and width.
            - `(pad,)` or int is a shortcut for before = after = pad width for all axes.
        mode (`PaddingMode`):
            The padding mode to use. Can be one of:
                - `"constant"`: pads with a constant value.
                - `"reflect"`: pads with the reflection of the vector mirrored on the first and last values of the
                  vector along each axis.
                - `"replicate"`: pads with the replication of the last value on the edge of the array along each axis.
                - `"symmetric"`: pads with the reflection of the vector mirrored along the edge of the array.
        constant_values (`float` or `Iterable[float]`, *optional*):
            The value to use for the padding if `mode` is `"constant"`.
        data_format (`str` or `ChannelDimension`, *optional*):
            The channel dimension format for the output image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
            If unset, will use same as the input image.
        input_data_format (`str` or `ChannelDimension`, *optional*):
            The channel dimension format for the input image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
            If unset, will use the inferred format of the input image.

    Returns:
        `np.ndarray`: The padded image.

    Nc                    s  t | ttfr| | f| | ff} nOt | tr,t| dkr,| d | d f| d | d ff} n3t | trCt| dkrCt | d trC| | f} nt | trXt| dkrXt | d trX| } ntd|  tjkrjdg| R ng | dR }  jdkr~dg| R } | S | } | S )za
        Convert values to be in the format expected by np.pad based on the data format.
        r   r   r   zUnsupported format: )r   r      )	r   r5   floatrM   rO   r!   r   r#   r^   )valuesr   r.   r(   r)   _expand_for_data_format  s   "$
$$z$pad.<locals>._expand_for_data_formatr   )r   r   r   )r   edger   zInvalid padding mode: )
r   r   r   r   padr   r   r   r!   r*   )r   r   r   r   r,   r.   r   r(   r   r)   r     s$   +



r   c                 C   s,   t tdg t| tjjs| S | d} | S )z
    Converts an image to RGB format. Only converts if the image is of type PIL.Image.Image, otherwise returns the image
    as is.

    Args:
        image (Image):
            The image to convert.
    r=   RGB)r   convert_to_rgbr   rB   rC   convert)r   r(   r(   r)   r     s
   	
r   c                 C   sx   |du rt | n|}|tjkr| ddddf } n|tjkr(| ddddf } ntd| |dur:t| ||d} | S )a  
    Flips the channel order of the image.

    If the image is in RGB format, it will be converted to BGR and vice versa.

    Args:
        image (`np.ndarray`):
            The image to flip.
        data_format (`ChannelDimension`, *optional*):
            The channel dimension format for the output image. Can be one of:
                - `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `ChannelDimension.LAST`: image in (height, width, num_channels) format.
            If unset, will use same as the input image.
        input_data_format (`ChannelDimension`, *optional*):
            The channel dimension format for the input image. Can be one of:
                - `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `ChannelDimension.LAST`: image in (height, width, num_channels) format.
            If unset, will use the inferred format of the input image.
    N.r>   zUnsupported channel dimension: r[   )r   r   r%   r#   r!   r*   )r   r,   r.   r(   r(   r)   flip_channel_order  s   

r   )N)NN)TNN)NNNTN)NNN)r   r;   r   r;   )r   r<   r   r<   )r   r;   r   r;   )r   r<   r   r<   )Arm   typingr   r   r   r   r   rD   r   image_utilsr   r	   r
   r   r   utilsr   r   r   r   r   utils.import_utilsr   r   r   r   r   rB   r   r   
tensorflowr   	jax.numpyjnpr    strr*   rf   r   r-   r1   r:   boolrA   r5   rM   rW   r\   rj   rl   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r(   r(   r(   r)   <module>   sH  	

*
&
7
F

K
?


c




 
W