o
    h:                     @   sV   d dl mZmZmZmZmZ d dlZG dd dZG dd deZG dd deZ	dS )	    )AnyDictListOptionalTupleNc                   @   s   e Zd ZdZ	ddejdejdedeee	e
f  deejejf f
dd	Zddee defddZdee fddZddedee defddZdS )Cachezf
    Base, abstract class for all caches. The actual data structure is specific to each subclass.
    N
key_statesvalue_states	layer_idxcache_kwargsreturnc                 C      t d)a  
        Updates the cache with the new `key_states` and `value_states` for the layer `layer_idx`.

        Parameters:
            key_states (`torch.Tensor`):
                The new key states to cache.
            value_states (`torch.Tensor`):
                The new value states to cache.
            layer_idx (`int`):
                The index of the layer to cache the states for.
            cache_kwargs (`Dict[str, Any]`, `optional`):
                Additional arguments for the cache subclass. These are specific to each subclass and allow new types of
                cache to be created.

        Return:
            A tuple containing the updated key and value states.
        z.Make sure to implement `update` in a subclass.NotImplementedErrorselfr   r	   r
   r    r   N/var/www/html/ai/venv/lib/python3.10/site-packages/transformers/cache_utils.pyupdate   s   zCache.updater   c                 C   r   )YReturns the sequence length of the cached states. A layer index can be optionally passed.z6Make sure to implement `get_seq_length` in a subclass.r   r   r
   r   r   r   get_seq_length%      zCache.get_seq_lengthc                 C   r   )zJReturns the maximum sequence length of the cached states, if there is any.z6Make sure to implement `get_max_length` in a subclass.r   r   r   r   r   get_max_length)   r   zCache.get_max_lengthnew_seq_lengthc                 C   s2   |   }| |}|dur|| |kr|| S |S )zTGiven the sequence length of the new inputs, returns the usable length of the cache.N)r   r   )r   r   r
   
max_lengthprevious_seq_lengthr   r   r   get_usable_length-   s
   
zCache.get_usable_lengthNr   )__name__
__module____qualname____doc__torchTensorintr   r   strr   r   r   r   r   r   r   r   r   r   r      s"    	
 r   c                   @   s  e Zd ZdZdddZdedeeej	  fddZ
d	d
 Zdd Z	d dej	dej	dedeeeef  deej	ej	f f
ddZd!dee defddZdee fddZdejfddZdeeej	 eej	 f fddZed deeeej   dd fddZdS )"DynamicCachea  
    A cache that grows dynamically as more tokens are generated. This is the default for generative models.

    It stores the Key and Value states as a list of tensors, one for each layer. The expected shape for each tensor is
    `[batch_size, num_heads, seq_len, head_dim]`.
    r   Nc                 C   s   g | _ g | _d| _d S Nr   )	key_cachevalue_cacheseen_tokensr   r   r   r   __init__A   s   
zDynamicCache.__init__r
   c                 C   s8   |t | k r| j| | j| fS tdt |  d| )z
        Support for backwards-compatible `past_key_value` indexing, e.g. `past_key_value[0][0].shape[2]` to get the
        sequence length.
        zCache only has z. layers, attempted to access layer with index )lenr+   r,   KeyErrorr   r   r   r   __getitem__F   s   zDynamicCache.__getitem__c                 c   s.    t t| D ]}| j| | j| fV  qdS )z
        Support for backwards-compatible `past_key_value` iteration, e.g. `for x in past_key_value:` to iterate over
        keys and values
        Nranger/   r+   r,   r   r   r   r   __iter__P   s   zDynamicCache.__iter__c                 C   s
   t | jS )z
        Support for backwards-compatible `past_key_value` length, e.g. `len(past_key_value)`. This value corresponds
        to the number of layers in the model.
        )r/   r+   r   r   r   r   __len__X   s   
zDynamicCache.__len__r   r	   r   c                 C   s   |dkr|  j |jd 7  _ t| j|kr"| j| | j| ntj| j| |gdd| j|< tj| j| |gdd| j|< | j| | j| fS )a  
        Updates the cache with the new `key_states` and `value_states` for the layer `layer_idx`.

        Parameters:
            key_states (`torch.Tensor`):
                The new key states to cache.
            value_states (`torch.Tensor`):
                The new value states to cache.
            layer_idx (`int`):
                The index of the layer to cache the states for.
            cache_kwargs (`Dict[str, Any]`, `optional`):
                Additional arguments for the cache subclass. No additional arguments are used in `DynamicCache`.

        Return:
            A tuple containing the updated key and value states.
        r   dim)r-   shaper/   r+   appendr,   r%   catr   r   r   r   r   _   s   zDynamicCache.updater   c                 C   "   t | j|kr	dS | j| jd S r   r   r6   r/   r+   r9   r   r   r   r   r      s   zDynamicCache.get_seq_lengthc                 C   s   dS )zfReturns the maximum sequence length of the cached states. DynamicCache does not have a maximum length.Nr   r   r   r   r   r      s   zDynamicCache.get_max_lengthbeam_idxc                 C   l   t t| jD ],}| j| j}| j| d||| j|< | j| j}| j| d||| j|< qdS zDReorders the cache for beam search, given the selected beam indices.r   Nr3   r/   r+   deviceindex_selecttor,   r   r?   r
   rC   r   r   r   reorder_cache       zDynamicCache.reorder_cachec                 C   s4   d}t t| D ]}|| j| | j| ff7 }q|S )zXConverts the `DynamicCache` instance into the its equivalent in the legacy cache format.r   r2   )r   legacy_cacher
   r   r   r   to_legacy_cache   s   zDynamicCache.to_legacy_cachepast_key_valuesc                 C   s>   |  }|durt t|D ]}|| \}}|||| q|S )zNConverts a cache in the legacy cache format into an equivalent `DynamicCache`.N)r3   r/   r   )clsrK   cacher
   r   r	   r   r   r   from_legacy_cache   s   zDynamicCache.from_legacy_cache)r   Nr   r    )r!   r"   r#   r$   r.   r'   r   r   r%   r&   r1   r4   r5   r   r   r(   r   r   r   r   
LongTensorrG   rJ   classmethodFloatTensorrN   r   r   r   r   r)   9   s0    


%"(r)   c                   @   s   e Zd ZdZdededdfddZedd	 Zd
ej	dej	dej	dej	fddZ
d
ej	dej	dej	deej	ej	f fddZddee defddZdee fddZ	dd
ej	dej	dedeeeef  deej	ej	f f
ddZdejfddZdS ) 	SinkCachea  
    A cache that as described in the [Attention Sinks paper](https://arxiv.org/abs/2309.17453). It allows the model to
    generate beyond the length of its context window, without losing fluency in the conversation. As it discards past
    tokens, the model will lose the ability to generate tokens that depend on the context that was discarded.

    It stores the Key and Value states as a list of tensors, one for each layer. The expected shape for each tensor is
    `[batch_size, num_heads, seq_len, head_dim]`.

    Parameters:
        window_length (`int`):
            The length of the context window.
        num_sink_tokens (`int`):
            The number of sink tokens. See the original paper for more information.
    window_lengthnum_sink_tokensr   Nc                 C   s(   g | _ g | _|| _|| _i | _d| _d S r*   )r+   r,   rS   rT   cos_sin_cacher-   )r   rS   rT   r   r   r   r.      s   
zSinkCache.__init__c                 C   sH   | dd | j d d f }| d| j d d d f }tj| |fddS )N.   r7   )r9   r%   r;   )xx1x2r   r   r   _rotate_half   s   zSinkCache._rotate_halfr   cossinc                 C   s   || |  ||  }|S r   )r[   )r   r   r\   r]   rotated_key_statesr   r   r   _apply_key_rotary_pos_emb   s   z#SinkCache._apply_key_rotary_pos_embc           
      C   s   |j d | jvrk|tj}|tj}|| j|j d  d  }|| j|j d   }|| j|j d  d  }|| j|j d   }|| ||  }| | ||  }	||jd|	|jdf| j|j d < | j|j d  S )Nr6   r   )r9   rU   rE   r%   float32rT   dtype	unsqueeze)
r   r   r\   r]   original_cosshifted_cosoriginal_sinshifted_sinrerotation_cosrerotation_sinr   r   r   _get_rerotation_cos_sin   s   z!SinkCache._get_rerotation_cos_sinr   r
   c                 C   r<   r=   r>   r   r   r   r   r      s   zSinkCache.get_seq_lengthc                 C   s   | j S )z9Returns the maximum sequence length of the cached states.)rS   r   r   r   r   r      s   zSinkCache.get_max_lengthr	   r   c                 C   s:  | d}| d}| d}|duo|du}|dkr%|  j|jd 7  _t| j|kr9| j| | j| n|jd | | | jk ret	j
| j| |gdd| j|< t	j
| j| |gdd| j|< n| j| dddd| j | j |jd  df }	|r| ||d| j |d| j \}
}|dur|	dd|f |	d|df }	}| |	|
|}	|durt	j
|	|fd	d}	| j| ddddd| jf }t	j
||	|gdd| j|< | j| ddddd| jf }| j| dddd| j | j |jd  df }t	j
|||gdd| j|< | j| | j| fS )
a;  
        Updates the cache with the new `key_states` and `value_states` for the layer `layer_idx`.

        Parameters:
            key_states (`torch.Tensor`):
                The new key states to cache.
            value_states (`torch.Tensor`):
                The new value states to cache.
            layer_idx (`int`):
                The index of the layer to cache the states for.
            cache_kwargs (`Dict[str, Any]`, `optional`):
                Additional arguments for the cache subclass. The following arguments can be used in `SinkCache`: `sin`,
                `cos` and `partial_rotation_size`. These arguments are used with models using RoPE, to recompute the
                rotation as the tokens are shifted.

        Return:
            A tuple containing the updated key and value states.
        r]   r\   partial_rotation_sizeNr   r6   r7   .rV   )getr-   r9   r/   r+   r:   r,   r   rS   r%   r;   rT   ri   r_   )r   r   r	   r
   r   r]   r\   rj   
using_ropekeys_to_keeprg   rh   	keys_pass	sink_keyssink_valuesvalues_to_keepr   r   r   r      sD   


 (""(zSinkCache.updater?   c                 C   r@   rA   rB   rF   r   r   r   rG   <  rH   zSinkCache.reorder_cacher    r   )r!   r"   r#   r$   r'   r.   staticmethodr[   r%   r&   r_   r   ri   r   r   r   r   r(   r   r   rO   rG   r   r   r   r   rR      sL    


	
OrR   )
typingr   r   r   r   r   r%   r   r)   rR   r   r   r   r   <module>   s
    3o