o
    h                     @   s`  U d dl Z d dlZd dlZd dlmZmZmZmZmZm	Z	m
Z
mZmZmZ d dlmZ d dlZd dlZd dlZd dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lm Z  z=d dl!Z!d dl"Z"d d
l#m$Z% e&d d dl'Zd dl(Zd dl)Zd dl*Zd dl+Zd dl,Zd dl-m.Z. d dl/m0Z0m1Z1 dZ2W n e3y   dZ2Y nw g dZ4de5fddZ6de
e7 fddZ8de7fddZ9dd Z:de7fddZ;e<e=Z>G dd deZ?d ej@jAddfd!d"ZBd#ej@jAddfd$d%ZCdee7d&f fd'd(ZDd ej@jAdeed&f fd)d*ZEd ej@jAdefd+d,ZFd ej@jAdee7d&f fd-d.ZGd/ee7d&f dee7d&f fd0d1ZHd2eejId&f ded3 fd4d5ZJd6eejId&f d7ed3 deejId&f fd8d9ZKd:edejIfd;d<ZLd=d>d?ee7d&f d@eejId&f dAed3 dBee7d&f dCeejId&f dDed3 dEe5deejId&f fdFdGZMd=d>d?ee7d&f d@eejId&f dAed3 dBee7d&f dCeejId&f dDed3 dEe5deejId&f fdHdIZNG dJdK dKZOe jPG dLdM dMZQee7ee7ee7ef f f ZReeSdN< 	 e jPddOeddPG dQdR dRZTeddPG dSdT dTZUeddPddUd ej@jAdVe	eeTee7ef f  fdWdXZVdS )Y    N)
AnyDictFinalListMappingOptionalSequenceSetTupleUnion)	TypeAlias)
FakeTensor)compatibility)FakeTensorProp)OperatorSupport)CALLABLE_NODE_OPS)_pytree)_pybind_state
onnxscript)fx_onnx_interpreter)_TORCH_DTYPE_TO_NUMPY_DTYPE(_TORCH_DTYPE_TO_ONNX_TENSOR_ELEMENT_TYPETF)is_onnxrt_backend_supportedtorch_compile_backendOrtExecutionProviderOrtBackendOptions
OrtBackendreturnc                   C   s   t S )a)  Returns ``True`` if ONNX Runtime dependencies are installed and usable
    to support TorchDynamo backend integration; ``False`` otherwise.

    Example::

        # xdoctest: +REQUIRES(env:TORCH_DOCTEST_ONNX)
        >>> import torch
        >>> if torch.onnx.is_onnxrt_backend_supported():
        ...     @torch.compile(backend="onnxrt")
        ...     def f(x):
        ...             return x * x
        ...     print(f(torch.randn(10)))
        ... else:
        ...     print("pip install onnx onnxscript-preview onnxruntime")
        ...
    )_SUPPORT_ONNXRT r   r   V/var/www/html/ai/venv/lib/python3.10/site-packages/torch/onnx/_internal/onnxruntime.pyr   C   s   r   c                   C   s   dgS )NCPUExecutionProviderr   r   r   r   r    _infer_default_epsW   s   r"   namec                 C   s    t j rt jj|  dS dS )zIf PyTorch is installed with CUDA support, this starts NVTX range.

    Check torch.cuda.nvtx.range_push's document for more details.
    N)torchcudais_availablenvtx
range_pushr#   r   r   r    _nvtx_range_push]   s   
r*   c                   C   s   t j rt jj  dS dS )zIf PyTorch is installed with CUDA support, this terminates NVTX range.

    Check torch.cuda.nvtx.range_pop's document for more details.
    N)r$   r%   r&   r'   	range_popr   r   r   r    _nvtx_range_popf   s   
r,   device_typec                 C   sB   | dkr	t j S | dkrt j S | dkrt j S td|  )Nr%   cpuortzUnsupported device type: )ORTC	OrtDevicer%   r.   npu
ValueError)r-   r   r   r    _get_ort_device_typeo   s   


r4   c                       s`   e Zd ZdZdee deeef f fddZde	ee
jjf de
jjdef fd	d
Z  ZS )OrtOperatorSupporta0  Operator support for ONNXRuntime backend.

    It has two-level of support decision. One is via support_dict and the other one
    is via extra_support_dict. The logic of using support_dict is implemented in
    OrtOperatorSupport and extra_support_dict is used by OperatorSupport.is_node_supported.
    support_dictextra_support_dictc                    s   t  | || _d S N)super__init___onnx_support_dict)selfr6   r7   	__class__r   r    r:      s   
zOrtOperatorSupport.__init__
submodulesnoder   c                    s   |j tvrdS |j dkr|j| jv rtd|jt|j dS td|jt|j t ||r>td|jt|j dS td|jt|j dS )NFcall_functionz0support_dict supports node.target: %s (type: %s)Tz7support_dict doesn't support node.target: %s (type: %s)z6extra_support_dict supports node.target: %s (type: %s)z>extra_support_dict doesn't supports node.target: %s (type: %s))	opr   targetr;   loggerwarningtyper9   is_node_supported)r<   r?   r@   r=   r   r    rG      s6   
z$OrtOperatorSupport.is_node_supported)__name__
__module____qualname____doc__r	   r   r   strr:   r   r$   nnModulefxNodeboolrG   __classcell__r   r   r=   r    r5      s    "r5   graph_modulec                 C   sh   | j }g }d}|jD ]}|jdkr|| |du r!|jdkr!|}q
|du r(dS |D ]}|| q*dS )z
    In torch.fx.Graph, placehoder is a special assignment node. If it's not
    executed in the beginning, it could overwrite values computed by upstream
    nodes.
    Nplaceholder)graphnodesrB   appendprepend)rS   rU   placeholdersfirst_not_placeholderr@   rT   r   r   r    _move_placeholder_to_front   s   


r[   	fx_modulec                 C   s  | j jD ]}t|jtjjr|jjtjj	j
krd}d}d}d}d|jv r.|jd tjkr.d}d|jv rC|jd |jd jd jkrCd}d|jvrJd}|jD ]}|dvrUd}qMt|jd	kru|ru|ru|ru|rud|jd i|_tjj	jj|_qtd
dd |jD  d|j q|   d S )NTlayoutFdevicer   valdtype)r]   r^   r`      zaaten._to_copy must be replaced with other ONNX-supported aten ops.                          args=c                 S   s   g | ]}|j qS r   )meta).0argr   r   r    
<listcomp>   s    z,_replace_to_copy_with_to.<locals>.<listcomp>z	, kwargs=)rU   rV   
isinstancerC   r$   _ops
OpOverloadoverloadpacketopsaten_to_copykwargsstridedargsrb   r^   lentor`   RuntimeError	recompile)r\   r@   is_default_layoutis_on_same_deviceis_castare_kwargs_supportedkwargr   r   r    _replace_to_copy_with_to   sN   


	ry   .c                  G   sP   g }| D ]}t |dr#|j}|jdkr|d q|jdkr#|d qt|S )zBReturn the first valid device (i.e., GPU or CPU) in argument list.r^   r%   CUDAExecutionProviderr.   r!   )hasattrr^   rF   rW   tuple)ro   epsrd   r^   r   r   r    _infer_ep_from_device   s   



r~   c                 C   sX   g }| j jD ]!}|jdkr't|dr"d|jv r"t|jd tjs"J || qt	|S )NrT   rb   r_   )
rU   rV   rB   r{   rb   rf   r$   TensorrW   r|   )rS   rY   r@   r   r   r    _extract_graph_module_inputs  s   

r   c                 C   s.   | j jD ]}|jdkr|jd   S qtd)zHCollect "val" fields from outputs metadata in this torch.fx.GraphModule.outputr   z2No output node found in this torch.fx.GraphModule.)rU   rV   rB   ro   r3   )rS   r@   r   r   r    _extract_graph_module_outputs  s
   
r   c                 C   s(   t t| \}}dd |D }t| S )z[Return the all valid devices (i.e., GPU or CPU) among outputs of this torch.fx.GraphModule.c                 S   s*   g | ]}t |d rd|jv r|jd qS )rb   r_   r{   rb   )rc   
output_argr   r   r    re      s    
z/_infer_ep_from_graph_module.<locals>.<listcomp>)r   tree_flattenr   r~   )rS   flattened_output_args_selected_output_argsr   r   r    _infer_ep_from_graph_module  s   r   r}   c                 C   s,   dt dtfdd}t| }tt||ddS )z:Sort execution providers in eps based on pre-set priority.epr   c                 S   s   | dkrdS | dkrdS dS )Nr!      rz   ra   r   r   )r   r   r   r    get_execution_provider_priority-  s
   z2_sort_eps.<locals>.get_execution_provider_priorityT)keyreverse)rL   intsetr|   sorted)r}   r   
unique_epsr   r   r    	_sort_eps*  s   r   valueszORTC.OrtDevice.c                    sJ   t fddD sJ ddtdtfdd t fddD }|S )	Nc                 3   s     | ]}|j  d  j kV  qdS )r   N)r^   rc   value)r   r   r    	<genexpr>=  s    
z$_get_onnx_devices.<locals>.<genexpr>z&All values must be on the same device.	device_idr   c                 S   s   | pdS )Nr   r   )r   r   r   r    _device_id_or_zeroA  s   z-_get_onnx_devices.<locals>._device_id_or_zeroc                 3   s4    | ]}t t|jjt j  |jjV  qd S r8   )r0   r1   r4   r^   rF   default_memoryindexr   )r   r   r    r   D  s    


)allr   r|   )r   devicesr   )r   r   r    _get_onnx_devices<  s   
r   tensorsr   c                 C   sn   t  }|t|  g }g }g }| D ]}|t|j  ||  ||  q|	| |||| |S r8   )
r0   OrtValueVectorreserverp   rW   r   r`   sizedata_ptrpush_back_batch)r   r   	ortvaluesdtypesshapes	data_ptrstensorr   r   r    !_get_ortvalues_from_torch_tensorsO  s   r   r   c                 C   s*   | j rtdtj|  | j| jd}|S )Nz#sparse tensor is not yet supported.)r`   r^   )	is_sparser3   r$   emptyr   r`   r^   )r   outr   r   r    _to_real_tensor`  s   r   sessonnxruntime.InferenceSessioninput_namesinputsinput_devicesoutput_namesoutputsoutput_devicespreallocate_outputc                 C   s   t d tdd |D }t  t d t||}|r*tdd |D }	t|	|}
nt }
t  t d t }|dd | 	|||||
| t  |rP|	S t d	 tj
jj|
}	t  |	S )
N
contiguousc                 s   s    | ]}|  V  qd S r8   )r   )rc   ar   r   r    r   r  s    z8_run_onnx_session_with_ortvaluevector.<locals>.<genexpr>r   c                 s   s&    | ]}t |trt|n|V  qd S r8   )rf   r   r   )rc   tr   r   r    r   }  s    
run_with_ortvaluevector'disable_synchronize_execution_providers1zafter run_with_ortvaluevector)r*   r|   r,   r   r0   r   onnxruntime
RunOptionsadd_run_config_entryr   training	ortmodule_utils_ortvalues_to_torch_tensor)r   r   r   r   r   r   r   r   
ort_inputspth_outputsort_outputsrun_optionsr   r   r    %_run_onnx_session_with_ortvaluevectorg  s6   


r   c                 C   s<   dd t ||D }| ||}	tdd t |	|D }
|
S )Nc                 S   s&   i | ]\}}|t j|  qS r   )r   OrtValueortvalue_from_numpyr.   numpy)rc   r#   r   r   r   r    
<dictcomp>  s    z0_run_onnx_session_with_fetch.<locals>.<dictcomp>c                 s   s&    | ]\}}t ||jV  qd S r8   )r$   
from_numpyrq   r^   )rc   r   r   r   r   r    r     s
    
z/_run_onnx_session_with_fetch.<locals>.<genexpr>)ziprunr|   )r   r   r   r   r   r   r   r   feedr   r   r   r   r    _run_onnx_session_with_fetch  s   
r   c                   @   sv   e Zd ZdZdddeedf ded deedf d	ed d
ed ded deeejdf ejf fddZ	dd Z
dS )OrtExecutionInfoPerSessionzWInformation required to execute torch.fx.GraphModule using onnxruntime.InferenceSessionsessionr   r   .input_value_infos)zonnx.ValueInfoProto.r   output_value_infosr   r   r   example_outputsc	           	      C   s4   || _ || _|| _|| _|| _|| _|| _|| _d S r8   r   r   r   r   r   r   r   r   )	r<   r   r   r   r   r   r   r   r   r   r   r    r:     s   z#OrtExecutionInfoPerSession.__init__c                 G   s   t |t | jkrdS t|| jD ]F\}}t|tjs dS t|j }||jj	j
kr- dS t|j|jj	jjD ]\}}t|trI|j|ksH|jrIq7t|tjrS|jrSq7  dS qdS NFT)rp   r   r   rf   r$   r   r   r`   rF   tensor_type	elem_typeshapedimr   	dim_value	dim_paramSymInt)r<   ro   rd   
value_info
onnx_dtyper   onnx_dimr   r   r    is_supported  s"   

	z'OrtExecutionInfoPerSession.is_supportedN)rH   rI   rJ   rK   r
   rL   r   r$   r   r:   r   r   r   r   r    r     s(    

	
!r   c                   @   s<   e Zd Zdd ZdejjfddZdejjdefddZ	d	S )
"OrtExecutionInfoForAllGraphModulesc                 C   s
   i | _ d S r8   )execution_info_per_graph_module)r<   r   r   r    r:     s   z+OrtExecutionInfoForAllGraphModules.__init__rS   c                 G   s8   || j vrd S | j | }|D ]}|j| r|  S qd S r8   )r   r   )r<   rS   ro   
candidates	candidater   r   r    &search_reusable_session_execution_info  s   


zIOrtExecutionInfoForAllGraphModules.search_reusable_session_execution_infoinfoc                 C   s.   || j vr|g| j |< d S | j | | d S r8   )r   rW   )r<   rS   r   r   r   r    cache_session_execution_info  s   
z?OrtExecutionInfoForAllGraphModules.cache_session_execution_infoN)
rH   rI   rJ   r:   r$   rO   GraphModuler   r   r   r   r   r   r    r     s    
r   r   )frozen)is_backward_compatiblec                   @   s   e Zd ZU dZdZeee  ed< 	 dZ	e
ed< 	 dZeee  ed< 	 dZe
ed< 	 dZe
ed	< 	 dZed
 ed< 	 dZed ed< dS )r   aJ  Options for constructing an ``OrtBackend``, the ONNX Runtime
    backend (``"onnxrt"``) for ``torch.compile``.

    Example::

        >>> @torch.compile(
        ...     backend="onnxrt",
        ...     options=torch.onnx._OrtBackendOptions(...),
        ... )
        ... def ort_function(x):
        ...     return x ** x
    Npreferred_execution_providersTinfer_execution_providersdefault_execution_providersFr   use_aot_autogradztorch.onnx.ExportOptionsexport_optionszonnxruntime.SessionOptionsort_session_options)rH   rI   rJ   rK   r   r   r   r   __annotations__r   rQ   r   r   r   r   r   r   r   r   r    r     s    
 	r   c                	   @   s   e Zd ZU dZddee fddZdejj	de
eeeeef f  fdd	Zdejj	fd
dZdejj	dejj	fddZdejj	dejj	fddZdZeed< g Zeed   ed< e	ddeeeeeef f  dd fddZedd Zedd ZdS )r   a	  A backend compiles (sub-)graphs in torch.fx.GraphModule to onnxruntime.InferenceSession calls.

    The compiler entry point is OrtBackend.compile, which
        1. partitions the original graph into supported sub-graphs (type: torch.fx.GrpahModule) and unsupported
           sub-graphs.
        2. For each supported sub-graph, it replaces its _wrapped_call function with _ort_accelerated_call.
        3. Inside _ort_accelerated_call, it creates onnxruntime.InferenceSession and calls it to execute the sub-graph.
    Noptionsc                 C   s   |d u rt  n|| _tjjj| jjd u rtj n| jj| _	tjjj
j| j	j}d d d}t||| _i | _t | _d| _d| _ttdrMt| _d S t| _d S )N)getattrz_operator.getitemFr   r   )r   _optionsr$   onnx	_internalexporterResolvedExportOptionsr   ExportOptions_resolved_onnx_exporter_optionsrO   decomposition_table'_create_onnx_supports_op_overload_tableonnx_registryr5   _supported_ops_partitioner_cacher   _all_ort_execution_info_assert_allclose_to_baselineexecution_countr{   r0   r   r   r   )r<   r   r6   r7   r   r   r    r:   ^  s.   

zOrtBackend.__init__rS   r   c                 G   s   t  }| jjrt|  }r|}nt| }r|}g }g | jjp g t|| jjp+t R D ]*}t	|t
r:|i f}nt	|t rK|d d u rK|d i f}|d urX||vrX|| q.|S )Nra   r   )r|   r   r   r~   r   r   r   r   r"   rf   rL   rW   )r<   rS   ro   inferred_epseps_from_argseps_from_graph_moduleselected_epsr   r   r   r    _select_eps  s,   




zOrtBackend._select_epsc              
   O   s  | j j|g|R  }|r|j}|j}|j}|j}|j}	|j}
ntj	j
jj| jj| }| jjrCd| _t|}dd }t||}
nzt|j|i |}
W n ty`   td| d| _ w tj| jjd}tj	j
jj| jj| }|j|| jj| jjd}|j | jj!j"d}t#j$|% | j&j'| j(|g|R  d}t)d	d
 |j*j+D }t)dd
 |j*j,D }t-|}t.|
t)rt-|
}	nt-|
f}	t/||t)dd
 |j*j+D |t)dd
 |j*j,D ||	|
d}| j 0|| |  j1d7  _1t.|
tj2}|r|
fn|
}t.|t)s	J t3dd
 |D sJ t4d | |||||||	| j&j}t5  | j6rVtj7j8j9|g|R ddi}|rB|fn|}t:||D ]\}}tj;<|| qI|r]|d S |S )a  This function replaces GraphModule._wrapped_call in compiled model.

        The _wrapped_call is the underlying implementation of forward method. Replacing
        it means we delegate the computation to _ort_acclerated_call and therefore
        onnxruntime.InferenceSession.
        Fc                 S   s"   t | drd| jv r| jd S | S )Nrb   r_   r   )r   r   r   r    maybe_map_to_meta_val  s   
z>OrtBackend._ort_acclerated_call.<locals>.maybe_map_to_meta_valzFakeTensorProb failed for %s)diagnostic_context)fx_graph_moduleonnxfunction_dispatcherop_level_debug)opset_version)path_or_bytessess_options	providersc                 s       | ]}|j V  qd S r8   r)   rc   inputr   r   r    r   1      z2OrtBackend._ort_acclerated_call.<locals>.<genexpr>c                 s   r  r8   r)   rc   r   r   r   r    r   2  r  c                 s       | ]}|V  qd S r8   r   r  r   r   r    r   ?      c                 s   r  r8   r   r  r   r   r    r   A  r  r   ra   c                 s   s    | ]	}t |tjV  qd S r8   )rf   r$   r   )rc   elemr   r   r    r   U  s    $run_onnx_session_with_ortvaluevectorexecutorrk   r   )=r  r   r   r   r   r   r   r   r$   r   r   rO   passesMovePlaceholderToFrontr   r  r   dynamic_shapesr   r   r   tree_mapr   	propagate	ExceptionrD   rE   r   FxOnnxInterpreterInsertTypePromotionr  r  to_model_protor  r  r   InferenceSessionSerializeToStringr   r   r  r|   rU   r  r   r   rf   r   r   r  r   r   r*   r,   r  _primsr  executer   testingassert_close)r<   rS   ro   rm   !cached_execution_info_per_sessiononnx_sessionr   r   r   r   prim_outputsextracted_outputsr  fx_interpreterexported
onnx_modelexecution_info_per_sessionis_single_tensor_outputnormalized_prim_outputsonnx_outputsbaseline_outputsnormalized_baseline_ouptutsonnx_outputbaseline_outputr   r   r    _ort_acclerated_call  s   





zOrtBackend._ort_acclerated_callc           	      C   s   ddl m} || jv r| j| }|S |}t| ||| jdd}| }|| j|< |jjD ]}|jdkrCd|j	v rCt
||j	}| j|_q-|S )Nr   )CapabilityBasedPartitionerT)allows_single_node_partitioncall_modulefused_)!torch.fx.passes.infra.partitionerr?  r  ry   r  partition_and_fuserU   rV   rB   r#   r   r>  _wrapped_call)	r<   rS   ro   r?  partitioned_prim_graph_moduleprim_graph_modulepartitionerr@   fused_moduler   r   r    compiler  s&   


zOrtBackend.compilec                 C   sF   | j jrddlm} ddlm} || j|| jjd||S | ||S )zIf ``OrtBackendOptions.use_aot_autograd`` is ``True``, the `auto_autograd` compiler
        will be invoked, wrapping this ``OrtBackend`` instance's ``compile`` method. Otherwise,
        the ``compile`` method is invoked directly.r   )#min_cut_rematerialization_partition)aot_autograd)fw_compilerpartition_fndecompositions)	r   r   functorch.compilerK  torch._dynamo.backends.commonrL  rJ  r   r   )r<   rS   ro   rK  rL  r   r   r    __call__  s   zOrtBackend.__call__   %_OrtBackend__instance_cache_max_count_OrtBackend__instance_cachec                    s   dt dt fddt t st di  pi  t fddtjD d}|du rJttjtjk s@J dtj d	t d
t dtjt  } |S )a  Returns a possibly cached instance of an ``OrtBackend``. If an existing
        backend was created previously through this function with the same options,
        it will be returned. Otherwise a new backend will be created, cached, and
        returned.

        Note: if ``options`` sets ``ort_session_options``, a new ``OrtBackend``
        will always be returned, since ``onnxruntime.SessionOptions`` cannot
        participate in caching.r   bc                 S   s   | j |j ks| j|jks| j|jks| j|jks| j|jkr dS | jd us*|jd ur,dS | j|ju r4dS | jd urf|jd urf| jj|jjkoe| jj|jjkoe| jj	|jj	koe| jj
|jj
u oe| jj|jju S dS r   )r   r   r   r   r   r   r   r"  r  diagnostic_optionsr  fake_context)r   rV  r   r   r    reusable  s2   z<OrtBackend.get_cached_instance_for_options.<locals>.reusablec                 3   s     | ]}|j  r|V  qd S r8   )r   )rc   rV  r   rY  r   r    r     s    z=OrtBackend.get_cached_instance_for_options.<locals>.<genexpr>NzNo more than z instances of z allowed. Please instantiate `z` explicitly to pass to `torch.compile`. See https://github.com/pytorch/pytorch/pull/107973#discussion_r1306144795 for discussion.r   )r   rf   nextr   rU  rp   rT  rW   )r   backendr   rZ  r    get_cached_instance_for_options  s"   
$
z*OrtBackend.get_cached_instance_for_optionsc                   C   s   t j  d S r8   )r   rU  clearr   r   r   r    clear_cached_instances  s   z!OrtBackend.clear_cached_instancesc                   C   s
   t tjS r8   )r|   r   rU  r   r   r   r    get_cached_instances  s   
zOrtBackend.get_cached_instancesr8   )rH   rI   rJ   rK   r   r   r:   r$   rO   r   r   r
   rL   r   r   r  r>  rJ  rR  rT  r   r   rU  r   staticmethodr   r]  r_  r`  r   r   r   r    r   S  s:   
 	K
! )9
F
r   )r   r   c                C   s   t || |S r8   )r   r]  )rS   ro   r   r   r   r    r     s   r   )Wdataclasses	importlibloggingtypingr   r   r   r   r   r   r   r	   r
   r   typing_extensionsr   r$   torch._C
torch._opstorch._prims.executortorch.fxtorch._subclasses.fake_tensorr   torch.fx._compatibilityr    torch.fx.passes.fake_tensor_propr    torch.fx.passes.operator_supportr   torch.fx.passes.tools_commonr   torch.utilsr   r   r   onnxruntime.capir   r0   import_module
torch.onnxtorch.onnx._internal torch.onnx._internal.diagnosticstorch.onnx._internal.exporter+torch.onnx._internal.fx.decomposition_tabletorch.onnx._internal.fx.passestorch.onnx._internal.fxr   "torch.onnx._internal.fx.type_utilsr   r   r   ImportError__all__rQ   r   rL   r"   r*   r,   r4   	getLoggerrH   rD   r5   rO   r   r[   ry   r~   r   r   r   r   r   r   r   r   r   r   r   	dataclassr   r   r   r   r   r   r   r   r   r    <module>   s   
 0
			
7-

" 


	
1

	
;$!
9   @