o
    ha                     @   s
  d Z ddlZddlZddlmZmZ ddlmZmZ ddl	Z	ddl
mZ ddlmZ dd	lmZmZ dd
lmZmZmZ eddrJddlm  mZ e rSddlmZ dd Zdd Zdd Zdd Zdd ZeddddZ dVddZ!dd Z"dd  Z#d!d" Z$d#d$ Z%d%d& Z&d'd( Z'd)d* Z(G d+d, d,e)Z*d-d. Z+d/d0 Z,e+d1d2 Z-d3efd4d5Z.d3efd6d7Z/dWd8d9Z0dXd;d<Z1e+dWd=e2fd>d?Z3dWd=e2fd@dAZ4dYdBdCZ5dWdDdEZ6G dFdG dGe7Z8e,dZdHdIZ9e+d[dLdMZ:dNdO Z;G dPdQ dQZ<dRdS Z=dTdU Z>dS )\zB
A set of basic tensor ops compatible with tpu, gpu, and multigpu
    N)update_wrapperwraps)AnyMapping   )PartialState   )!TORCH_DISTRIBUTED_OPERATION_TYPES)DistributedTypeTensorInformation)is_torch_distributed_availableis_torch_versionis_tpu_availableF)check_device)ReduceOpc                 C   s   t | tjS N)
isinstancetorchTensortensor r   Q/var/www/html/ai/venv/lib/python3.10/site-packages/accelerate/utils/operations.pyis_torch_tensor(   s   r   c              	   C   s2   t | tjjtjjtjjtjjtjjtjjtjj	S r   )
r   r   xpuFloatTensor
ByteTensor	IntTensor
LongTensor
HalfTensorDoubleTensorBFloat16Tensorr   r   r   r   is_torch_xpu_tensor,   s   r"   c                 C   s
   t | tS r   )r   r   tensor_infor   r   r   is_tensor_information9      
r%   c                 C   sV   t | }|j}t|dks|d tkrdS t|dd}t|ts"dS tdd |D S )z
    Checks if `x` is a `namedtuple` or not. Can have false positives, but only if a user is trying to mimic a
    `namedtuple` perfectly.
    r   r   F_fieldsNc                 s   s    | ]}t |tV  qd S r   )r   str).0memberr   r   r   	<genexpr>I   s    z is_namedtuple.<locals>.<genexpr>)type	__bases__lentuplegetattrr   all)data	data_typebasesfieldsr   r   r   is_namedtuple=   s   
r6   c                 C   s$   t | rt| t| S t| |S )zO
    Cast a generator to the same type as obj (list, tuple, or namedtuple)
    )r6   r,   list)obj	generatorr   r   r   
honor_typeL   s   r:   	test_typeerror_on_other_typec                   s   t |ttfrt| fdd|D S t |tr/t| fdd| D S |r>|g R i S rRtdt| dj dj d|S )	a9  
    Recursively apply a function on a data structure that is a nested list/tuple/dictionary of a given base type.

    Args:
        func (`callable`):
            The function to recursively apply.
        data (nested list/tuple/dictionary of `main_type`):
            The data on which to apply `func`
        *args:
            Positional arguments that will be passed to `func` when applied on the unpacked data.
        main_type (`type`, *optional*, defaults to `torch.Tensor`):
            The base type of the objects to which apply `func`.
        error_on_other_type (`bool`, *optional*, defaults to `False`):
            Whether to return an error or not if after unpacking `data`, we get on an object that is not of type
            `main_type`. If `False`, the function will leave objects of types different than `main_type` unchanged.
        **kwargs:
            Keyword arguments that will be passed to `func` when applied on the unpacked data.

    Returns:
        The same data structure as `data` with `func` applied to every object of type `main_type`.
    c                 3   s.    | ]}t |g R d V  qdS )r;   Nrecursively_apply)r)   oargsr=   funckwargsr<   r   r   r+   p   s    
z$recursively_apply.<locals>.<genexpr>c                    s0   i | ]\}}|t |g R d qS )r;   r>   r)   kvrA   r   r   
<dictcomp>y   s    z%recursively_apply.<locals>.<dictcomp>zUnsupported types (z) passed to `z?`. Only nested list/tuple/dicts of objects that are valid for `z` should be passed.)	r   r/   r7   r:   r   r,   items	TypeError__name__)rC   r2   r<   r=   rB   rD   r   rA   r   r?   W   s,   
	r?   c                    s   t | ttfrt|  fdd| D S t | tr:t tr#gndu r)g t|  fdd|  D S t| drVz| j	 dW S  t
yU   | 	  Y S w | S )a  
    Recursively sends the elements in a nested list/tuple/dictionary of tensors to a given device.

    Args:
        tensor (nested list/tuple/dictionary of `torch.Tensor`):
            The data to send to a given device.
        device (`torch.device`):
            The device to send the data to.

    Returns:
        The same data structure as `tensor` with all tensors sent to the proper device.
    c                 3   s     | ]}t | d V  qdS )non_blocking	skip_keysNsend_to_device)r)   tdevicerM   rN   r   r   r+      s    z!send_to_device.<locals>.<genexpr>Nc              	      s.   i | ]\}}||v r|nt | d qS )rL   rO   )r)   rF   rQ   rR   r   r   rH      s    z"send_to_device.<locals>.<dictcomp>to)rM   )r   r/   r7   r:   r   r(   r,   rI   hasattrrT   rJ   )r   rS   rM   rN   r   rR   r   rP      s*   


rP   c                 C      dd }t || S )aK  
    Recursively gathers the information needed to rebuild a nested list/tuple/dictionary of tensors.

    Args:
        data (nested list/tuple/dictionary of `torch.Tensor`):
            The data to send to analyze.

    Returns:
        The same data structure as `data` with [`~utils.TensorInformation`] instead of tensors.
    c                 S   s   t | j| jdS )N)shapedtype)r   rW   rX   r   r   r   r   _get_data_structure   s   z/get_data_structure.<locals>._get_data_structurer>   )r2   rY   r   r   r   get_data_structure      
rZ   c                 C   rV   )a:  
    Recursively gathers the shape of a nested list/tuple/dictionary of tensors as a list.

    Args:
        data (nested list/tuple/dictionary of `torch.Tensor`):
            The data to send to analyze.

    Returns:
        The same data structure as `data` with lists of tensor shapes instead of tensors.
    c                 S   s
   t | jS r   )r7   rW   r   r   r   r   
_get_shape   r&   zget_shape.<locals>._get_shaper>   )r2   r\   r   r   r   	get_shape   r[   r]   c                 C   s   dd }t || tdS )z
    Recursively initializes tensors from a nested list/tuple/dictionary of [`~utils.TensorInformation`].

    Returns:
        The same data structure as `data` with tensors instead of [`~utils.TensorInformation`].
    c                 S   s   t j| jd| jiS NrX   )r   emptyrW   rX   r#   r   r   r   _initialize_tensor      z.initialize_tensors.<locals>._initialize_tensorr<   )r?   r%   )data_structurer`   r   r   r   initialize_tensors   s   rd   c                 C   s   t | tttfrt| dkrtdt|  dt | ttfr%t| d S t | tr9|  D ]	}t| |   S nt | t	j
sItdt|  d| jd S )a  
    Recursively finds the batch size in a nested list/tuple/dictionary of lists of tensors.

    Args:
        data (nested list/tuple/dictionary of `torch.Tensor`): The data from which to find the batch size.

    Returns:
        `int`: The batch size.
    r   z&Cannot find the batch size from empty .z0Can only find the batch size of tensors but got )r   r/   r7   r   r.   
ValueErrorr,   find_batch_sizekeysr   r   rJ   rW   )r2   rF   r   r   r   rg      s   


rg   c                 C   rV   )aS  
    Recursively finds tensors in a nested list/tuple/dictionary and converts them to a list of numbers.

    Args:
        data (nested list/tuple/dictionary of `torch.Tensor`): The data from which to convert to regular numbers.

    Returns:
        The same data structure as `data` with lists of numbers instead of `torch.Tensor`.
    c                 S   s,   |    } | jtjkr| tj} |  S r   )detachcpurX   r   bfloat16rT   float32tolistr   r   r   r   _convert_to_list  s   z!listify.<locals>._convert_to_listr>   )r2   rn   r   r   r   listify   s   
	ro   c                 C   s"   dd }t || dd}t  |S )Nc                 S   s0   | j dkr|  d  } |  s|  } t| S )Nr   )ndimcloneis_contiguous
contiguousxm
all_gatherr   r   r   r   _tpu_gather_one  s
   

z$_tpu_gather.<locals>._tpu_gather_oneTr=   )r?   rt   	mark_step)r   rv   resr   r   r   _tpu_gather  s   	rz   c                    s>   t  tddrtjj ntjj  fdd}t|| ddS )Nz>=z1.13c                    s    j dkr  d     s   jjdkr# jjdkr#tdjd urPjdkrPtj	j
    jjd}|  |jdg  dd  R  S  fdd	tj
D }tj|  tj|dd
S )Nr   cudazOne or more of the tensors passed to `gather` were not on the GPU while the `Accelerator` is configured for CUDA. Please move it to the GPU before calling `gather`.gloo)rX   rS   r   c                    s   g | ]}t  qS r   )r   
empty_liker)   _r   r   r   
<listcomp>D  s    z8_gpu_gather.<locals>._gpu_gather_one.<locals>.<listcomp>dim)rp   rq   rr   rs   rS   r,   RuntimeErrorbackendr   r_   num_processesnumelrX   viewsizerangedistributedru   cat)r   output_tensors	gather_opstater   r   _gpu_gather_one%  s&   

z$_gpu_gather.<locals>._gpu_gather_oneTrw   )r   r   r   r   all_gather_into_tensor_all_gather_baser?   )r   r   r   r   r   _gpu_gather  s   

#r   c                   @   s   e Zd ZdZdS )DistributedOperationExceptionz
    An exception class for distributed operations. Raised if the operation cannot be performed due to the shape of the
    tensors.
    N)rK   
__module____qualname____doc__r   r   r   r   r   K  s    r   c                       t   fdd}|S )zv
    Verifies that `tensor` is the same shape across all processes. Only ran if `PartialState().debug` is `True`.
    c                     s   t  jtjkst  js | i |S  j d j }d|v r$|d }n| d }t|}t|g}|d d urZ|	|d t
|k}|sZddd t|D }td| d|  | i |S )	Nre   r   r   z
  - c                 S   s    g | ]\}}d | d| qS )zProcess z: r   )r)   irW   r   r   r   r   g  s     z5verify_operation.<locals>.wrapper.<locals>.<listcomp>znCannot apply desired operation due to shape mismatches. All shapes across devices must be valid.

Operation: `z`
Input shapes:
  - )r   distributed_typer
   NOdebugr   rK   r]   gather_objectcountr.   join	enumerater   )rB   rD   	operationr   shapesoutputare_sameprocess_shape_strfunctionr   r   wrapperY  s(   

z!verify_operation.<locals>.wrapperr   r   r   r   r   r   verify_operationT  s   r   c                    r   )z
    Checks that `verify_operation` failed and if so reports a more helpful error chaining the existing
    `DistributedOperationException`.
    c               
      sN   z | i |W S  t y& } z j d j }t d| d|d }~ww )Nre   zError found while calling `z1`. Please see the earlier error for more details.)r   r   rK   )rB   rD   er   r   r   r   r   x  s   
z"chained_operation.<locals>.wrapperr   r   r   r   r   chained_operationr  s   	r   c                 C   s.   t  jtjkrt| S t  jtv rt| S | S )a4  
    Recursively gather tensor in a nested list/tuple/dictionary of tensors from all devices.

    Args:
        tensor (nested list/tuple/dictionary of `torch.Tensor`):
            The data to gather.

    Returns:
        The same data structure as `tensor` with all tensors sent to the proper device.
    )r   r   r
   TPUrz   r	   r   r   r   r   r   gather  s
   r   objectc                 C   s2   dd t t jD }tj||  dd |D S )Nc                 S   s   g | ]}d qS r   r   r   r   r   r   r     s    z&_gpu_gather_object.<locals>.<listcomp>c                 S   s   g | ]	}|D ]}|qqS r   r   )r)   yxr   r   r   r     s    )r   r   r   r   r   all_gather_object)r   output_objectsr   r   r   _gpu_gather_object  s   r   c                 C   s.   t  jtjkrtdt  jtv rt| S | S )a5  
    Recursively gather object in a nested list/tuple/dictionary of objects from all devices.

    Args:
        object (nested list/tuple/dictionary of picklable object):
            The data to gather.

    Returns:
        The same data structure as `object` with all the objects sent to every device.
    z&gather objects in TPU is not supported)r   r   r
   r   NotImplementedErrorr	   r   )r   r   r   r   r     s
   r   c                 C   s   ddd}t || d|dS )Nr   c                 S   s   t jj| |d | S )Nsrc)r   r   	broadcast)r   r   r   r   r   _gpu_broadcast_one  s   z*_gpu_broadcast.<locals>._gpu_broadcast_oneT)r=   r   r   r>   )r2   r   r   r   r   r   _gpu_broadcast  s   
r   broadcast tensorc                    sh   t | ttfrt|  fddt| D S t | tr)t|  fdd|  D S t	 | fddS )Nc                 3   s*    | ]\}}t |  d | dV  qdS )r   nameN_tpu_broadcast)r)   r   rQ   r   r   r   r+        ( z!_tpu_broadcast.<locals>.<genexpr>c                    s(   i | ]\}}|t |  d | dqS )r   r   r   rE   r   r   r   rH        ( z"_tpu_broadcast.<locals>.<dictcomp>c                       |   S r   r   r   r   r   r   <lambda>      z _tpu_broadcast.<locals>.<lambda>)
r   r7   r/   r:   r   r   r,   rI   rt   mesh_reduce)r   r   r   r   )r   r   r   r     s
   
r   from_processc                 C   s8   t  jtjkrt| |ddS t  jtv rt| |dS | S )a  
    Recursively broadcast tensor in a nested list/tuple/dictionary of tensors to all devices.

    Args:
        tensor (nested list/tuple/dictionary of `torch.Tensor`):
            The data to gather.
        from_process (`int`, *optional*, defaults to 0):
            The process from which to send the data

    Returns:
        The same data structure as `tensor` with all tensors broadcasted to the proper device.
    zaccelerate.utils.broadcast)r   r   r   )r   r   r
   r   r   r	   r   )r   r   r   r   r   r     s
   r   c                    s^   t  jtjkrt| D ]\}}td| fdd| |< q| S t  jtv r-tj	j
|  d | S )a  
    Broadcast a list of picklable objects form one process to the others.

    Args:
        object_list (list of picklable objects):
            The list of objects to broadcast. This list will be modified inplace.
        from_process (`int`, *optional*, defaults to 0):
            The process from which to send the data.

    Returns:
        The same list containing the objects from process 0.
    z&accelerate.utils.broadcast_object_listc                    r   r   r   r   r   r   r   r     r   z'broadcast_object_list.<locals>.<lambda>r   )r   r   r
   r   r   rt   r   r	   r   r   broadcast_object_list)object_listr   r   r8   r   r   r   r     s   r   c                 C   s   dd }t || |S )aN  
    Recursively takes a slice in a nested list/tuple/dictionary of tensors.

    Args:
        data (nested list/tuple/dictionary of `torch.Tensor`):
            The data to slice.
        tensor_slice (`slice`):
            The slice to take.

    Returns:
        The same data structure as `data` with all the tensors slices.
    c                 S   s   | | S r   r   )r   tensor_slicer   r   r   _slice_tensor     z$slice_tensors.<locals>._slice_tensorr>   )r2   r   process_indexr   r   r   r   r   slice_tensors  s   r   c                    s   t  d ttfrt d  fddtt d D S t  d tr9t d  fdd d  D S t  d t	j
sLtdt d  t	j dS )a  
    Recursively concatenate the tensors in a nested list/tuple/dictionary of lists of tensors with the same shape.

    Args:
        data (nested list/tuple/dictionary of lists of tensors `torch.Tensor`):
            The data to concatenate.
        dim (`int`, *optional*, defaults to 0):
            The dimension on which to concatenate.

    Returns:
        The same data structure as `data` with all the tensors concatenated.
    r   c                 3   s*    | ] t  fd dD dV  qdS )c                       g | ]}|  qS r   r   r)   dr   r   r   r         z)concatenate.<locals>.<genexpr>.<listcomp>r   Nconcatenater)   r2   r   r   r   r+     r   zconcatenate.<locals>.<genexpr>c                    s(   i | ]  t  fd dD dqS )c                    r   r   r   r   rF   r   r   r     r   z*concatenate.<locals>.<dictcomp>.<listcomp>r   r   r   r   r   r   rH     r   zconcatenate.<locals>.<dictcomp>z%Can only concatenate tensors but got r   )r   r/   r7   r:   r   r.   r   r,   rh   r   r   rJ   r   r   r   r   r   r     s   *(r   c                   @   s   e Zd ZdS )CannotPadNestedTensorWarningN)rK   r   r   r   r   r   r   r     s    r   c                 C   s   ddd}t || d|||dS )	a3  
    Recursively pad the tensors in a nested list/tuple/dictionary of tensors from all devices to the same size so they
    can safely be gathered.

    Args:
        tensor (nested list/tuple/dictionary of `torch.Tensor`):
            The data to gather.
        dim (`int`, *optional*, defaults to 0):
            The dimension on which to pad.
        pad_index (`int`, *optional*, defaults to 0):
            The value with which to pad.
        pad_first (`bool`, *optional*, defaults to `False`):
            Whether to pad at the beginning or the end.
    r   Fc           	         s   t | ddrtdt | S  t| jkr| S tj| j| jdd  }t	|
 }t fdd|D | j  kr<| S | jt}| < | t|| }|rdt fddtt|D }nt fddtt|D }| ||< |S )	N	is_nestedFzHCannot pad nested tensors without more information. Leaving unprocessed.)rS   c                 3   s    | ]}|  V  qd S r   r   )r)   sr   r   r   r+   ;  s    zFpad_across_processes.<locals>._pad_across_processes.<locals>.<genexpr>c                 3   s2    | ]}| krt    nt d V  qd S r   slicer)   r   r   max_sizeold_sizer   r   r+   D  s    "
c                 3   s.    | ]}| krt d   nt dV  qdS )r   Nr   r   )r   r   r   r   r+   H  s   , )r0   warningswarnr   r.   rW   r   r   rS   r   rj   maxr7   	new_zerosr/   r   )	r   r   	pad_index	pad_firstr   sizesnew_size
new_tensorindicesr   r   r   _pad_across_processes-  s0   

 z3pad_across_processes.<locals>._pad_across_processesT)r=   r   r   r   Nr   r   Fr>   )r   r   r   r   r   r   r   r   pad_across_processes  s   
r   mean      ?c                 C   s   ddd}t || d||dS )	aX  
    Recursively reduce the tensors in a nested list/tuple/dictionary of lists of tensors across all processes by the
    mean of a given operation.

    Args:
        tensor (nested list/tuple/dictionary of `torch.Tensor`):
            The data to reduce.
        reduction (`str`, *optional*, defaults to `"mean"`):
            A reduction method. Can be of "mean", "sum", or "none"
        scale (`float`, *optional*):
            A default scaling value to be applied after the reduce, only valied on XLA.

    Returns:
        The same data structure as `data` with all the tensors reduced.
    r   r   c                 S   sl   t  }|  }|jtjkr|S |jtjkrtd|| n|jjt	v r+t
j|tj |dkr4||j }|S )Nsumr   )r   rq   r   r
   r   r   rt   
all_reducevaluer	   r   r   r   SUMr   )r   	reductionscaler   cloned_tensorr   r   r   _reduce_across_processesc  s   
z(reduce.<locals>._reduce_across_processesT)r=   r   r   Nr   r   r>   )r   r   r   r   r   r   r   reduceQ  s   

r   c                 C   s   dd }dd }t || |dS )av  
    Recursively converts the elements nested list/tuple/dictionary of tensors in FP16/BF16 precision to FP32.

    Args:
        tensor (nested list/tuple/dictionary of `torch.Tensor`):
            The data to convert from FP16/BF16 to FP32.

    Returns:
        The same data structure as `tensor` with all tensors that were in FP16/BF16 precision converted to FP32.
    c                 S   s   |   S r   )floatr   r   r   r   _convert_to_fp32  r   z)convert_to_fp32.<locals>._convert_to_fp32c                 S   s   t | do| jtjtjfv S r^   )rU   rX   r   float16rk   r   r   r   r   _is_fp16_bf16_tensor  s   z-convert_to_fp32.<locals>._is_fp16_bf16_tensorrb   r>   )r   r   r  r   r   r   convert_to_fp32u  s   r  c                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )	ConvertOutputsToFp32ad  
    Decorator to apply to a function outputing tensors (like a model forward pass) that ensures the outputs in FP16
    precision will be convert back to FP32.

    Args:
        model_forward (`Callable`):
            The function which outputs we want to treat.

    Returns:
        The same function as `model_forward` but with converted outputs.
    c                 C   s   || _ t| | d S r   )model_forwardr   )selfr  r   r   r   __init__  s   zConvertOutputsToFp32.__init__c                 O   s   t | j|i |S r   )r  r  )r  rB   rD   r   r   r   __call__  ra   zConvertOutputsToFp32.__call__c                 C   s
   t d)NzCannot pickle a prepared model with automatic mixed precision, please unwrap the model with `Accelerator.unwrap_model(model)` before pickling it.)picklePicklingError)r  r   r   r   __getstate__  s   z!ConvertOutputsToFp32.__getstate__N)rK   r   r   r   r  r  r  r   r   r   r   r    s
    r  c                    s   t    fdd} |_|S )Nc                     s    | i |S r   r   )rB   rD   r  r   r   forward  s   z(convert_outputs_to_fp32.<locals>.forward)r  __wrapped__)r  r  r   r  r   convert_outputs_to_fp32  s   r  c                 C   s~   t | tr|  D ]}t|}|dur|  S q	dS t | ttfr4| D ]}t|}|dur1|  S q#dS t | tjr=| jS dS )z
    Finds the device on which a nested dict/list/tuple of tensors lies (assuming they are all on the same device).

    Args:
        (nested list/tuple/dictionary of `torch.Tensor`): The data we want to know the device of.
    N)	r   r   valuesfind_devicer/   r7   r   r   rS   )r2   r8   rS   r   r   r   r    s"   
r  )FNr   )r   r   )NNr   r   )?r   r	  r   	functoolsr   r   typingr   r   r   r   r   	constantsr	   dataclassesr
   r   importsr   r   r   torch_xla.core.xla_modelcore	xla_modelrt   torch.distributedr   r   r"   r%   r6   r:   r?   rP   rZ   r]   rd   rg   ro   rz   r   	Exceptionr   r   r   r   r   r   r   r   intr   r   r   r   UserWarningr   r   r   r  r  r  r  r   r   r   r   <module>   sf   

3%-	




4#