o
    h~[                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	m
Z
 d dlmZ d dlmZmZmZmZ d dlZd dlmZ d dlZd dlmZ d dlmZ d dlmZ d dlmZmZ d	d
lm Z  d	dlm!Z!m"Z" e#e$Z%edZ&edZ'e'( Z)e)rd dl*Z+g Z,dZ-e)rg dZ,e+j.j/j01 2ddZ3d4dd e,D Z-g dZ5G dd dZ6dd Z7dZ8G dd dZ9e:ddd Z;dd d!d"Z<d#d$ Z=d%d& Z>G d'd( d(e?Z@d)d* ZAdGd+d,ZB	dHddd-d.d/ZCd0d1 ZDd2d3 ZEd4d5 ZF	dHddd-d6d7ZGd8eeeH  d9eeH d:eeH fd;d<ZId=e&d:eee& ge&f fd>d?ZJeJejKZLeJeMd@ZNeJd ZOeJdZPeJdZQG dAdB dBZRG dCdD dDZSG dEdF dFZTdS )I    N)Counter)import_module)CallableOptionalSequenceTypeVar)rand_strided)is_float_dtype)StorageWeakRef)ContentStoreReaderContentStoreWriter   )config)clone_inputsget_debug_dirTztorch._inductor.config )z1//caffe2/torch/fb/sparsenn:sparsenn_operators_gpuz-//caffe2/torch/fb/sparsenn:sparsenn_operatorsz///deeplearning/fbgemm/fbgemm_gpu:sparse_ops_cpuz+//deeplearning/fbgemm/fbgemm_gpu:sparse_opszfbcode://
c                 C      g | ]}d | dqS )ztorch.ops.load_library("z") .0xr   r   O/var/www/html/ai/venv/lib/python3.10/site-packages/torch/_dynamo/debug_utils.py
<listcomp>0       r   )buck2runz@mode/dev-nosanc                   @   s&   e Zd Zdd Zdd Zd	ddZdS )
BuckTargetWriterc                 C   s   t jt j|\| _| _| jdd| _| jdd d| j | _| j| jdd  | _| jdd  | _| j}||dd  dd  }d| d	| j | _	d S )
Nz.pyr   /.zfbcode.   zfbcode/r   :)
ospathsplitabspathsubdirpy_filereplacetargetfindcmd_line_path)selffilenametmpr   r   r   __init__7   s   zBuckTargetWriter.__init__c                 C   sD   d dd tD }td| j d| j dt d| d| j d	S )
Nr   c                 S   r   )z	        "z",r   r   r   r   r   r   F   r   z*BuckTargetWriter.build.<locals>.<listcomp>za
load("@fbcode_macros//build_defs:python_binary.bzl", "python_binary")

python_binary(
    name="z",
    srcs = ["z"],
    compile = False,
    deps = [
        "//caffe2:torch",
        "//caffe2/functorch:functorch",
        "//triton:triton",
        "z",
    ],
    cpp_deps = [
z
    ],
    main_module = "z",
)
)join
extra_depstextwrapdedentr+   r)   
cur_targetr%   )r.   extra_cpp_depsr   r   r   buildE   s   zBuckTargetWriter.buildTc                 C   sn   t j| jd}t|d}||   W d    n1 sw   Y  t| jg }|r5t	
dd| |S )NTARGETSwzFFound an example that reproduces the error. Run this cmd to repro - %s )r$   r%   r2   r(   openwriter8   BUCK_CMD_PREFIXr-   logwarning)r.   	print_msgtarget_filefd	cmd_splitr   r   r   r=   ]   s   zBuckTargetWriter.writeN)T)__name__
__module____qualname__r1   r8   r=   r   r   r   r   r   6   s    r   c                  C   sL   t jt d} | d u rt  dt  } t j| s$t j	| dd | S )Nminifierz
/minifier_T)exist_ok)
r$   r%   r2   r   tempfile
gettempdirgetpassgetuserexistsmakedirs)r%   r   r   r   minifier_dirk   s   rP      c                   @   s   e Zd Zejjejjejjejjejj	ejj
ejjejjejjejjejjejjejjejjejjejjejjejjejjejjejjgZedd Zedd ZdS )NNModuleToStringc                 C   sL   t  }|  D ]\}}t|tjvr|| qt|dkr$td| dS )Nr   z-We have not tested reprs of some modules - %sT)	setnamed_childrentyperR   
safe_reprsaddlenr?   r@   )gmcant_convert_moduler   r   r   can_convert_to_string   s   
z&NNModuleToString.can_convert_to_stringc                 C   s  ddl m} d}td}|  D ]+\}}|  }t| d }|d ur-|jr-| d}||d  d| d| d	7 }q| j	
 D ]X\}}	|	d u rKqB|	 tkrcdd
lm}
 |
jtks^J t|	}n t|	rvdt|	j d|	j d}ndt|	j d|	j d}|	jr| d}||d  d| d| d7 }qB| j
 D ].\}}|d u rqd}|jrd}dt|j d|j | d}||d  d| d| d	7 }q||| jd d	7 }|S )Nr   )
_addindent    z
            from torch.nn import *
            class Repro(torch.nn.Module):
                def __init__(self):
                    super().__init__()
            z.cuda()   zself.z = r   )
PRINT_OPTSztorch.randn(z, dtype=)ztorch.randint(1, size=zself.register_buffer('z', z)
r   z, device="cuda"ztorch.nn.Parameter(torch.randn(z))rQ   )torch.nn.modules.moduler^   r4   r5   rT   __repr__next
parametersis_cuda_buffersitemsnumelMAX_CONSTANT_NUMEL_INLINEtorch._tensor_strra   	thresholdreprtorchis_floating_pointlistshapedtype_parameterscode)rY   r^   tab	model_strmodule_namer\   
module_strexample_parambuffer_namebufferra   
tensor_str
param_nameparammaybe_devicer   r   r   convert   sF   	

 


  	zNNModuleToString.convertN)rE   rF   rG   ro   nnLinearConv1dConv2dConv3dBatchNorm1dBatchNorm2dBatchNorm3d	LayerNormDropoutSoftmaxReLUGELUIdentity	MaxPool2d	EmbeddingTanhConvTranspose1dGLULSTMFlattenAdaptiveAvgPool2drV   staticmethodr]   r   r   r   r   r   rR   w   s6    
rR   c                  C   s   t j sdS d} z%tjddgtjd}|j d}d	dd	 |D }| | d7 } W n t
y;   | d
7 } Y nw tdd tt j D }| d7 } | D ]\}}| d| d| d7 } qR| d7 } | S )Nz:# torch.cuda.is_available()==False, no GPU info collected
z# CUDA Info: 
nvccz	--version)stdoutr   r   c                 S   s    g | ]}|d vrd| dqS ))r   #  
r   )r   sr   r   r   r      s     z-_cuda_system_info_comment.<locals>.<listcomp>z# nvcc not found
c                 s   s    | ]	}t j|V  qd S N)ro   cudaget_device_name)r   ir   r   r   	<genexpr>   s    
z,_cuda_system_info_comment.<locals>.<genexpr>z# GPU Hardware Info: 
r   z : r   )ro   r   is_available
subprocessr   PIPEr   decoder&   r2   FileNotFoundErrorr   rangedevice_countri   )rw   cuda_version_outcuda_version_linescomment	gpu_namesnamecountr   r   r   _cuda_system_info_comment   s&   
r   F)stable_outputc                 C   sH   dd l }dd l}| rdS d|jj  d|jj  d|jj  dS )Nr   z*# config omitted due to stable_output=TruezYimport torch._dynamo.config
import torch._inductor.config
import torch._functorch.config
r   )torch._functorch.configtorch._inductor.config_dynamor   codegen_config	_inductor
_functorch)r   ro   r   r   r   generate_config_string   s   


r   c                   C   s   t jt dS )Nzminifier_launcher.py)r$   r%   r2   rP   r   r   r   r   get_minifier_repro_path  s   r   c              
   C   s   t  }td| trt|  zt|d}||  W d    W d S 1 s)w   Y  W d S  tyF } z
t| t	d|d }~ww )NzWriting minified repro to:
%sr:   z(Could not write to {minified_repro_path})
r   r?   r@   use_buckr   r=   r<   OSError	exceptionNotImplementedError)contentsminified_repro_pathrC   er   r   r   helper_for_dump_minify	  s   &

r   c                   @   s   e Zd ZdS )AccuracyErrorN)rE   rF   rG   r   r   r   r   r     s    r   c                 C   sB   t | }tt| D ]}t|| tjr|| | | j q
|S )z
    This clone inputs is different from utils clone_input. In case of minifier,
    all the tensors are leaf tensors while creating a new graph. So, we set the
    requires_grad field w/o checking the leafness of the tensor.
    )r   r   rX   
isinstancero   Tensorrequires_grad_requires_grad)example_inputscloned_inputsidxr   r   r   clone_inputs_retaining_gradness  s   r   c                 C   s   ddl m} ddlm}m}m} t| } |st|}t	| dr%| 
d t| dd}t| d	d}	t	| d
sP|dus>|	durP|| } |durI|| _|	durP|	| _| |}
|rX|
S ||
rd||
}|  || |
d|S )z
    Runs a forward and possibly backward iteration for a given mod and args.

    When disable_clone is True, we will use args as-is without cloning.
    This is higher fidelity but we may destroy the args in the process.
    r   )make_boxed_funcr   )collect_resultsreduce_to_scalar_lossrequires_bwd_pass	zero_gradTnamed_parametersNnamed_buffers_boxed_call)torch._functorch.aot_autogradr   testingr   r   r   copydeepcopyr   hasattrr   getattrr   r   backward)rY   argsonly_fwddisable_cloner   r   r   r   orig_named_parametersorig_named_buffersoutlossr   r   r   run_fwd_maybe_bwd)  s.   



r   require_fp64ignore_non_fpc             
   C   s(  ddl m} ddlm}m} ddlm}	 t| |r#|| | _|| | _	t||r2|||_|||_	t
| ||}
d}tjrfztt| t|\}}t
|||}W n tye   |r^tdtd Y nw zt
|||}W n ty } ztd W Y d}~dS d}~ww |	|
||tjd|d	}|S )
aa  
    Check two models have same accuracy.

    require_fp64: if True, raise an error if we unable to calculate the fp64 reference
    ignore_non_fp: if True, do not compare outputs which are not floating point.  This
        is mostly useful for the minifier (which wants to avoid quantizing floating point
        error into integer/boolean error)
    r   )OptimizedModule)"named_buffers_for_optimized_module%named_parameters_for_optimized_module)sameNzCould not generate fp64 outputszWhile minifying the program in accuracy minification mode, ran into a runtime exception which is likely an unrelated issue. Skipping this graph.T)tol	equal_nanr   )
eval_framer   r   r   r   utilsr   r   r   r   r   r   same_two_models_use_fp64cast_to_fp64r   r   r   	ExceptionRuntimeErrorr?   r@   r   repro_tolerance)rY   opt_gmr   r   r   r   r   r   r   r   reffp64_ref
fp64_modelfp64_examplesresr   passingr   r   r   same_two_modelsP  sP   






r   c                 C   s   | j jD ]1}|jdkr5|jtjjjjkr5t	|j
dksJ t|j
d r5|j
d tjkr5|j
d tjf|_
q| j   |   | S )Ncall_functionr`   r   r   )graphnodesopr+   ro   opsprimsconvert_element_typedefaultrX   r   r	   float64lint	recompile)modelnoder   r   r   !cast_convert_element_type_to_fp64  s   

r  c                    sB   ddl m} | } tjkrt|}| fdd|}||fS )Nr   )tree_mapc                    s"   t | tjr|  r|  S | S r   )r   ro   r   rp   tor   rs   r   r   <lambda>  s
   
zcast_to.<locals>.<lambda>)torch.utils._pytreer  r	  ro   r  r  )rs   r  inputsr  r   r  r   cast_to  s   


r  c                 C   s   t tj| |S r   )r  ro   r  )r  r  r   r   r   r     s   r   c             
   C   s^   z|t | t|}t| |||||d W S  ty. } ztd W Y d }~dS d }~ww )Nr   zWhile minifying the program in accuracy minification mode, ran into a runtime exception which is likely an unrelated issue. Skipping this graphF)r   r   r   r   r   r?   r   )rY   r   compiler_fnr   r   r   compiled_gmr   r   r   r   backend_accuracy_fails  s&   	
r  striderr   returnc                C   s   | d ur| S t |S r   )r   make_contiguous_strides_for)r  rr   r   r   r   _stride_or_default  s   r  dc                    s    fddS )Nc                    s   | d ur| S  S r   r   r
  r  r   r   r    s    z_mk_defaulter.<locals>.<lambda>r   r  r   r  r   _mk_defaulter  s   r  cpuc                   @   s4   e Zd Zdd ZdddddZdd Zd	d
 ZdS )NopInputReaderc                 C   s
   d| _ d S )Nr   total)r.   r   r   r   r1     s   
zNopInputReader.__init__Ndevice
dtype_hintc                C   s   |  j d7  _ d S )Nr   r  )r.   storage_hashnbytesr  r   r   r   r   storage  s   zNopInputReader.storagec                 O      d S r   r   r.   r   kwargsr   r   r   tensor     zNopInputReader.tensorc                 O   r$  r   r   r%  r   r   r   symint  r(  zNopInputReader.symintrE   rF   rG   r1   r#  r'  r)  r   r   r   r   r    s
    r  c                   @   sL   e Zd ZdddddZdddddZ	ddddddd	d
Zdd ZdS )InputReaderN)pbarc                C   s8   |d u r	t d |d urt|nd | _g | _|| _d S )Nz0no save_dir specified, will generate random data)r?   r@   r   storer   r,  )r.   save_dirr,  r   r   r   r1     s
   

zInputReader.__init__r  c                C   s   | j d ur| j d t|}t|}| jd ur=|d ur=z| j|}W n	 ty-   Y nw ||jkr;t	d||j |S t	d| ||j
 f}td |d}t|||| S )Nr   zdevice mismatch: %s != %sz1could not load %s, generating random data insteadrr   )r,  update_device_or_default_dtype_or_defaultr-  read_storager   r  r?   r@   itemsizer  r   untyped_storage)r.   r!  r"  r  r   r#  rr   r  r   r   r   r#    s"   

zInputReader.storage)storage_offsetrs   r   is_leafc          
      K   s  t ||d}t|}t|}t|}t|}tjg ||j|d}	t  |		|||| W d    n1 s7w   Y  |sut
  |	jtjd}	W d    n1 sTw   Y  t  |		|||| W d    n1 spw   Y  tjj|	|ksJ tj|	| | j|	 |	S )Nr/  )rs   r  r   )memory_format)r  _storage_offset_or_defaultr2  _is_leaf_or_default_requires_grad_or_defaultro   r'  r  no_gradset_enable_gradclonepreserve_format_subclasses
meta_utilssafe_is_leaf_utilsset_tensor_metadatar   append)
r.   r#  rr   r  r6  rs   r   r7  metadatatr   r   r   r'  %  s,   



zInputReader.tensorc                 C   s   | j | |S r   )r   rF  )r.   valr   r   r   r)  F  s   zInputReader.symintr   r*  r   r   r   r   r+    s    !r+  c                   @   sL   e Zd ZddddZdd Zdddd	efd
dZdddZdddZdS )InputWriterFstable_hashc                C   s:   g | _ t | _|| _|d urt||dnd | _i | _d S )NrK  )_lines	itertoolsr   storage_counterr.  r   r-  seen_storages)r.   r.  rL  r   r   r   r1   X  s   

zInputWriter.__init__c                 C   s*   dg}| dd | jD  |d |S )Nzdef load_args(reader):c                 s   s    | ]}d | V  qdS )r_   Nr   )r   lr   r   r   r   h  s    z$InputWriter.lines.<locals>.<genexpr>zload_args._version = 0)extendrM  rF  )r.   rr   r   r   linesd  s
   
zInputWriter.linesNr   device_hintr  c             
   C   s   t |}| j|}|d ur|S dt| j }d}td t|kr'd|}d}|j}|jdkr9|d us7J |}td |krDd|}|	 }	d }
| j
d ur[|jjdkr[| j
|}
| j| d|
d|	| | d || j|< |S )	Nbufr   z, dtype_hint=metaz	, device=z = reader.storage(, rb   )r
   rP  getre   rO  r2  r  rU   r1  r"  r-  write_storagerM  rF  )r.   r5  r   rV  wsvmaybe_dtype_hintr   r  r"  r!  r   r   r   r#  s  s0   



zInputWriter.storagec              	   C   s4  | j | |j|jd}g }td |jd| kr$|tt	|  t
d |jkr4|d|j td | krF|d|  tj|}|rZ|dd | D  td |jkrj|d|j tjj|}td |kr|d| | jd	d
|tt	|jg| d|   d S )NrU  r/  zdtype=zstorage_offset=c                 s   s"    | ]\}}| d |V  qdS )=Nr   )r   kr]  r   r   r   r     s     z%InputWriter.tensor.<locals>.<genexpr>zrequires_grad=zis_leaf=zreader.tensor(rY  )  # )r#  r5  rs   r  r  rr   r  rF  strtupler2  r9  r6  ro   rD  get_tensor_metadatarR  ri   r;  r   rA  rB  rC  r:  rM  r2   )r.   r   rH  r#  r   tensor_metadatar7  r   r   r   r'    s2   zInputWriter.tensorc                 C   s0   t |tjr
|jj}| jd|d|  d S )Nzreader.symint(ra  )r   ro   SymIntr  hintrM  rF  )r.   r   rI  r   r   r   r)    s   zInputWriter.symint)r  N)	rE   rF   rG   r1   rT  rb  r#  r'  r)  r   r   r   r   rJ  W  s    
rJ  )FF)F)Ur   	functoolsrL   rN  loggingr$   r   rJ   r4   collectionsr   	importlibr   typingr   r   r   r   ro   torch._prims_common_prims_commonr   torch._subclasses.meta_utilstorch._dynamo.testingr   r	    torch.multiprocessing.reductionsr
   torch.utils._content_storer   r   r   r   r   r   	getLoggerrE   r?   r   inductor_config	is_fbcoder   libfb.py.build_infolibfbr3   extra_importspy
build_info	BuildInfoget_build_ruler*   r6   r2   r>   r   rP   rk   rR   	lru_cacher   r   r   r   r   r   r   r   r   r  r  r   r  intr  r  float32r2  r  r1  r9  r;  r:  r  r+  rJ  r   r   r   r   <module>   s    
5	d

+E)

 
T