o
    h^+                     @   sb  d dl mZmZ d dlZd dlm  mZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZmZmZ d dlmZmZ d dlmZmZ dZ ej!"e dZ#ej!"e ddZ$ej!"e ddZ%ej!"e ddZ&dd Z'd&ddZ(dej)fddZ*dej)fddZ+dd Z,dd Z-d d! Z.d"d# Z/e. Z0e/ Z1d$d% Z2dS )'    )OptionalTupleN)_prims)DispatchKey)autograd_not_implemented)HigherOrderOperator)CUDARngStateHelpermake_contiguous_strides_for)backwards_not_supported)FakeTensorMode)disable_proxy_modes_tracingProxyTorchDispatchModetrack_tensor_tree)_device_dtype)_get_current_dispatch_mode_pop_mode_temporarilyrngprimsDEFIMPLCompositeExplicitAutogradAutogradMetac                 C   s"   t d| j d| j d| j d)Nz"You are trying to functionalize a z RNG operator but zE does not use Philox/counter-based RNG. Therefore, functionalizing a zo RNG operator is not supported. We are discussing the possibility of a Philox-based RNG implementation for CPU.)RuntimeErrortype)device r   L/var/www/html/ai/venv/lib/python3.10/site-packages/torch/_prims/rng_prims.pythrow_on_non_cuda"   s
   r   c           	      C   s   t | t| | t| | ttjjj	| }|j
}|r!||_t| t| ||fD ]}||_tjjj|_||_||_||_q-d S N)rngprimdefinerngprim_implimplrngprim_meta_implgetattrtorch_opsopsr   default_tagsrngprim_autograd_implr
   __doc___prims_commonRETURN_TYPENEWreturn_typeschema	impl_atenprim_meta_impl)	namer1   r2   	impl_metadoctagsprim_packetprimpr   r   r   register_rng_prim*   s   
r;   shapec                 C   s   t tjdtjdS )Nr   dtype)r   
TensorLiker&   tensorint64)r<   r   r   r   philox_rand_offset_metaA      rB   c                 C   s   d}| D ]}||9 }qt j|t jd}d}d}d}t jt j }|j| }|| d | }	t|	|j| }	|d ||	 |  d | }
|
S )N   r=         )	r&   scalar_tensorrA   cudaget_device_propertiescurrent_devicemax_threads_per_multi_processorminmulti_processor_count)r<   numel_scalardim_sizenumel
block_sizeunrollcurand4_engine_callsdevice_propertyblocks_per_sm	grid_sizeoffsetr   r   r   philox_rand_offsetG   s   

rX   c                  C   s   d} d}dt jdt jdt jdtttdf  dtd	tfd
d}dt jdt jdt jdtttdf  dtd	tfdd}t| |||dt j	j
fd d S )Nphilox_randzphilox_rand(SymInt[] size, Tensor seed, Tensor offset, int[]? stride, Device? device=None, ScalarType? dtype=None) -> (Tensor, Tensor)r<   seedrW   stride.r   r>   c                 S   s6   |d u sJ t | }tj| |||d}t| }||fS )N)r<   stridesr>   r   )r	   r   
TensorMetarB   )r<   rZ   rW   r[   r   r>   random_valuesr   r   r   _philox_rand_metac   s   	z/register_philox_rand.<locals>._philox_rand_metac                 S   s   |d u sJ |j dkrg }n|g}|j dkrt|tj| t|| tj| ||d}W d    n1 s9w   Y  |t| fS )NcpurH   )r   r>   )	r   r   r&   randomfork_rngr   set_torch_state_tensorrandrX   )r<   rZ   rW   r[   r   r>   devicesr^   r   r   r   _philox_randt   s   	

z*register_philox_rand.<locals>._philox_randz$Philox based stateless rand operator)r4   r1   r2   r5   r6   r7   )r&   SizeTensorr   r   intr   r   r;   Tagnondeterministic_seeded)r4   r1   r_   rf   r   r   r   register_philox_rand_   sH   


rl   c                 C   sl   | dr| d}t|trt|}|jS dd | D }tdd |D r)dS tdd |D r4dS d S )	Nr   c                 S   s    h | ]}t |tjr|jjqS r   )
isinstancer&   rh   r   r   ).0argr   r   r   	<setcomp>   s     zget_device.<locals>.<setcomp>c                 s       | ]}|d kV  qdS )rH   Nr   rn   devr   r   r   	<genexpr>       zget_device.<locals>.<genexpr>rH   c                 s   rq   )r`   Nr   rr   r   r   r   rt      ru   r`   )getrm   strr&   r   r   any)argskwargsr   re   r   r   r   
get_device   s   



r{   c                     s   t dtj tj tj tj tj tj	t
dd tjdd tjdd tjfdd	 t fd
dtfdd} S )Nrun_and_save_rng_stateTdeferred_errorc                 _   s   t j | |i |fS r   )r&   rH   get_rng_stateopry   rz   r   r   r   	impl_cuda   s   z5register_run_and_save_rng_state_op.<locals>.impl_cudac                 _   s   t  | |i |fS r   )r&   r   r   r   r   r   impl_cpu   s   z4register_run_and_save_rng_state_op.<locals>.impl_cpuc                    sH    d}t ||}||v sJ d| || }|| g|R i |S N)rH   r`   zBackend not supported for r{   )r   ry   rz   impl_mapr   r#   r   r   r   r   impl_backend_select   s
   

z?register_run_and_save_rng_state_op.<locals>.impl_backend_selectc                    s    | g|R i |S r   r   r   )r   r   r   impl_fake_tensor_mode   rC   zAregister_run_and_save_rng_state_op.<locals>.impl_fake_tensor_modec                    s   t  }|d us	J t N}|jrH | g|R i |}t|jj| g|R }t|jj|}|jd||}t||d |jdW  d    S | g|R i |W  d    S 1 s]w   Y  d S Ncall_function)constanttracer)	r   r   enable_tracingpytreetree_mapr   unwrap_proxycreate_proxyr   )r   ry   rz   modeout
proxy_argsproxy_kwargs	out_proxy)r   r|   r   r   impl_proxy_dispatch_mode   s    
$zDregister_run_and_save_rng_state_op.<locals>.impl_proxy_dispatch_mode)r   fallthroughr   ADInplaceOrViewAutocastCPUAutocastCUDAPythonDispatcherPythonTLSSnapshotpy_implr   r   CUDACPUBackendSelectr   r   )r   r   )r   r   r   r   r|   r   "register_run_and_save_rng_state_op   s(   






r   c                     s   t dtj tj tj tj tj tj	t
dd tjdd tjdd  tfdd	} tj fd
d}tdd }S )Nrun_with_rng_stateTr}   c                 _   s8   t j }t j|   ||i |}t j| |S r   )r&   rH   r   set_rng_stater`   	rng_stater   ry   rz   current_stater   r   r   r   r      s
   
z1register_run_with_rng_state_op.<locals>.impl_cudac                 _   s.   t  }t |  ||i |}t | |S r   )r&   r   r   r   r   r   r   r      s
   

z0register_run_with_rng_state_op.<locals>.impl_cpuc           	   	      s   t  }|d us	J t d}|jr]t   | |g|R i |}W d    n1 s*w   Y  t|jj| |g|R }t|jj|}|jd ||}t	||d |jdW  d    S  | |g|R i |W  d    S 1 ssw   Y  d S r   )
r   r   r   r   r   r   r   r   r   r   )	r   r   ry   rz   r   r   r   r   r   )r   r   r   r      s(   
$z@register_run_with_rng_state_op.<locals>.impl_proxy_dispatch_modec                    sJ    d}t ||}||v sJ d| || }|| |g|R i |S r   r   )r   r   ry   rz   r   r   r#   r   r   r   r     s
   

z;register_run_with_rng_state_op.<locals>.impl_backend_selectc                 _   s   ||i |S r   r   )r   r   ry   rz   r   r   r   r     s   z=register_run_with_rng_state_op.<locals>.impl_fake_tensor_mode)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   r   )r   r   r   r   register_run_with_rng_state_op   s(   







r   c                   C   s
   t   d S r   )rl   r   r   r   r   register_rng_prims#  s   
r   r   )3typingr   r   r&   torch.utils._pytreeutils_pytreer   r   torch._Cr   torch._higher_order_ops.utilsr   
torch._opsr   torch._prims_commonr   r	   torch._prims_common.wrappersr
   torch._subclasses.fake_tensorr   "torch.fx.experimental.proxy_tensorr   r   r   torch.typesr   r   torch.utils._python_dispatchr   r   rngprim_namespacelibraryLibraryr    r"   r+   r$   r   r;   rg   rB   rX   rl   r{   r   r   r|   r   r   r   r   r   r   <module>   sF    


77C