o
    h"                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dlmZmZmZ d dlmZ d dlm Z  d dlm!Z! d dlm"Z" d dl
m#Z# d d	lm$Z$ d d
l%m&Z&m%Z% d dl'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z- d dl.Z.d dl/m0Z0m1Z1m2Z2 d dl3m4Z4 d dl5m6Z6m7Z7 e	j89e:Z;e	j8<e	j8<e;Z=e0> rd dl?m@Z@ d dlAmBZB d dlCmDZDmEZEmFZF ndd ZDdd ZEdd ZFdZGd aHdaIdd ZJdd ZKeLeMZNeOddd ZPdd  ZQG d!d" d"ZRG d#d$ d$eRZSG d%d& d&eRZTd'd( ZUdd*eVfd+d,ZWdd-eVd.eVd/eVfd0d1ZXdd3e-eVeYf d*eVd4eVfd5d6ZZ	)	2	)dd3e-eVeYf d.eVd*eVd4eVd/eVf
d7d8Z[d9eVd3e-eVeYf fd:d;Z\ej]G d<d= d=Z^d>e^fd?d@Z_dAdB Z`eOdCdDdE ZadFdG ZbdHdI ZceOddJdK ZdG dLdM dMZeej]G dNdO dOeeZfej]G dPdQ dQeeZgG dRdS dSeeZheh Zief eg gZjeOddTdU ZkdVdW ZlddYdZZmdd[d\Znd]d^ Zod_d` Zpdadb Zqdcdd Zrdedf Zsdgdh ZteOddidj ZueOddkdl ZvdmeidmdmfdneefdodpZwdXdXdmeidmdmfdneefdqdrZxG dsdt dtZyG dudv dvZzejOdwdx Z{dydz Z|dd|d}Z}G d~d dZ~G dd dZG dd dZG dd dZdd Zdd ZG dd dZG dd dZe  dS )    N)bisect_right)FutureProcessPoolExecutorThreadPoolExecutor)cdll)field)partial)abc)Path)Thread)sleeptime)AnyCallableDictListSetUnion)configcuda_propertiesexc)developer_warning)	_Faketqdmtqdm)build_paths)_run_build_command)log_global_cache_statslog_global_cache_valsuse_global_cachec                  O      d S N argskwargsr!   r!   O/var/www/html/ai/venv/lib/python3.10/site-packages/torch/_inductor/codecache.pyr   8      r   c                  O   r   r    r!   r"   r!   r!   r%   r   ;   r&   r   c                   C      dS NFr!   r!   r!   r!   r%   r   >   r&   r   iX  c                   C   s   t d u r	t a d S d S r    )_t0r   r!   r!   r!   r%   _compile_startI   s   
r*   c                  C   s&   t d urt } t| t  7 ad a d S d S r    )r)   r   _cumulative_compile_time)t1r!   r!   r%   _compile_endO   s
   r-   c                  C   s<   t jd} | d u rt  dt  } t j| dd | S )NTORCHINDUCTOR_CACHE_DIRz/torchinductor_Texist_ok)osenvirongettempfile
gettempdirgetpassgetusermakedirs)	cache_dirr!   r!   r%   r9   [   s
   r9   c                 C   sz   t jjd u rdn
dt jjdd }dtjj tjj }| d| }tj	
t |}tj	
|| }tj|dd |S )	Ncpucu. py_Tr/   )torchversioncudareplacesysversion_infomajorminorr1   pathjoinr9   r8   )namecu_strpython_versionbuild_foldercpp_wrapper_dircpp_wrapper_build_directoryr!   r!   r%   cpp_wrapper_cache_dird   s   rP   c                   @   sf   e Zd Zeeddd Zeeddd Zeeddd Zdd	 Z	d
d Z
dd ZdS )	CacheBaseNc                  C   s   z	dd l } | j}W n ty   d }Y nw dtjtj jitjj|ddtj	jj
jid}ttj|ddd |d	< |S )
Nr   rJ   )rB   triton
allow_tf32)devicerA   otherT)	sort_keysutf-8hash)rR   __version__ModuleNotFoundErrorr@   rB   get_device_propertiescurrent_devicerJ   rA   backendsmatmulrS   hashlibsha256jsondumpsencode	hexdigest)rR   triton_versionsystemr!   r!   r%   
get_systemt   s.   
zCacheBase.get_systemc                   C   s   t tjt dt d S )NcacherX   )r
   r1   rH   rI   r9   rQ   rg   r!   r!   r!   r%   get_local_cache_path   s   zCacheBase.get_local_cache_pathc                   C   s*   t jd urttjt jt d S d S )NrX   )r   global_cache_dirr
   r1   rH   rI   rQ   rg   r!   r!   r!   r%   get_global_cache_path   s
   
zCacheBase.get_global_cache_pathc                 C   s0   t j sd S t | _t | _t | _	d S r    )
r@   rB   is_availablerQ   rg   rf   ri   local_cache_pathrk   global_cache_pathselfr!   r!   r%   __init__   s
   


zCacheBase.__init__c                 C   sP   | j  si S t| j }t|}W d    |d S 1 sw   Y  |d S Nrh   )rm   is_fileopenra   load)rp   local_cache_fplocal_cacher!   r!   r%   get_local_cache   s   

zCacheBase.get_local_cachec                 C   sD   t j| jjst j| jjdd t| jtj| j	|ddd d S )NTr/   )rf   rh      )indent)
r1   rH   existsrm   parentr8   write_atomicra   rb   rf   )rp   rw   r!   r!   r%   update_local_cache   s   zCacheBase.update_local_cache)__name__
__module____qualname__staticmethod	functools	lru_cacherg   ri   rk   rq   rx   r~   r!   r!   r!   r%   rQ   s   s    	rQ   c                   @   s4   e Zd Zdee fddZdee defddZdS )
LocalCachekeysc                 G   s0   |   }|}|D ]}||v r|| }q d S |S r    )rx   )rp   r   rh   	sub_cachekeyr!   r!   r%   lookup   s   
zLocalCache.lookupvaluec                G   sL   |   }|}|dd D ]}||i  || }q|||d < | | d S )Nr   )rx   
setdefaultr~   )rp   r   r   rh   r   r   r!   r!   r%   	set_value   s   
zLocalCache.set_valueN)r   r   r   r   strr   r   r   r!   r!   r!   r%   r      s    r   c                   @   s>   e Zd Zeddd Zdededeege	f fddZ
dS )	PersistentCacheNc                 C   sZ   | j d u s
| j  si S t| j }t|}W d    |d S 1 s$w   Y  |d S rr   )rn   rs   rt   ra   ru   )rp   global_cache_fpglobal_cacher!   r!   r%   get_global_cache   s   
z PersistentCache.get_global_cacherJ   inputs	benchmarkc                    s   t t| j}t t| j}i d fdd	}tjs"tjrq|  }||sot r5|| 	 |dso D ]"}	||	|	< |
i  | 
i  |	 |  |	 < q7| | t rofdd D }
||
 S t r||| 	 |d S )a  
        Check to see if we have benchmarked the given choice callers. For each
        choice caller:

            1. Check global_cache[name][inputs][choice], return benchmark if cached.
            2. Check local_cache[name][inputs][choice], return benchmark if cached.
            3.
                a. `max_autotune_gemm=True`: benchmark the choice, update
                    local_cache[name][inputs][choice], and return the benchmark.
                b. `max_autotune_gemm=False`: don't benchmark the choice, return nothing.
        Nc                    sZ   d} D ]}|  }|| i i v r!|   | |< qd} |r+||d |S )z2Check if `cache` contains data for all the choicesTF)cached)hash_keyr3   )rh   callbackhitchoicechoice_hashchoicesr   rJ   timingsr!   r%   check_cache   s   
z+PersistentCache.lookup.<locals>.check_cache)r   c                    s   i | ]	}|   | qS r!   )r   ).0r   )r   r!   r%   
<dictcomp>  s    z*PersistentCache.lookup.<locals>.<dictcomp>r    )r   r   rf   r   r   max_autotunemax_autotune_gemmrx   r   r   r   r   r~   )rp   r   rJ   r   r   	log_statslog_valsr   rw   r   timings_to_logr!   r   r%   r      s4   

zPersistentCache.lookup)r   r   r   r   r   r   r   r   r   floatr   r!   r!   r!   r%   r      s    
r   c                  C   s.   t jt d} t j| st j| dd | S )NlocksTr/   )r1   rH   rI   r9   r{   r8   )lock_dirr!   r!   r%   get_lock_dir   s   r   r=   extrac                 C   sH   | }|dkr|d | }dt t|d d d d  S )Nr=   z||crW   3   )base64	b32encoder_   r`   rc   digestdecodelower)coder   hashing_strr!   r!   r%   	code_hash'  s    r   basename	extensionspecified_dirc                 C   sb   |rt j|r|}nt jt |}nt jt | dd }t j||  d| }| ||fS )N      r<   )r1   rH   isabsrI   r9   )r   r   r   subdirrH   r!   r!   r%   get_path3  s   
r   r   content	hash_typec                 C   s:   |dv sJ d|dkrt | |S |dkrt t| S d S )N)r   cubinzHash type not supportedr   r   )r   repr)r   r   r   r!   r!   r%   get_hash?  s   
r   c           	      C   sV   t | ||}t|||\}}}tj|stj|dd tj|s't||  ||fS )NTr/   )r   r   r1   rH   r{   r8   r}   )	r   r   r   r   r   r   r   r   rH   r!   r!   r%   writeG  s   
r   rH   c                 C   s   t |ttfsJ dt| } | jdt  dt	  d }t |tr'dnd}|
|}|| W d    n1 s>w   Y  ||  d S )Nz6Only strings and byte arrays can be saved in the cacher<   z.tmpwwb)
isinstancer   bytespathlibr
   r|   r1   getpid	threading	get_identrt   r   rename)rH   r   tmp_path
write_modefr!   r!   r%   r}   W  s   
 r}   c                   @   s   e Zd ZU dZdZeed< dZeed< dZe	ed< dZ
e	ed< dZeed< eedZee	 ed	< eedZee ed
< eedZee	 ed< eedZee ed< dZeed< defddZdd ZdS )CompiledFxGraphz!Class holding a compiled FX graphNcompiled_artifactcurrent_callable	cache_keyartifact_pathcache_linemap)default_factorydevice_typesdevice_idxsmutated_inputsmutated_input_idxs_boxed_callreturnc                 C   s   |   |S r    )get_current_callable)rp   r   r!   r!   r%   __call__u     zCompiledFxGraph.__call__c                 C   s"   | j d u rttt| S | j S r    )r   r   r   _run_from_cacheweakrefproxyro   r!   r!   r%   r   x  s   
z$CompiledFxGraph.get_current_callable)r   r   r   __doc__r   r   __annotations__r   r   r   r   r   r   r   setr   r   r   intr   listr   r   boolr   r   r   r!   r!   r!   r%   r   e  s   
 r   compiled_graphc                 C   sF   | j d u rddlm} || j| j| jd ur| jndj| _ |  |S )Nr   )PyCodeCacher!   )r   	codecacher   load_by_key_pathr   r   r   call)r   r   r   r!   r!   r%   r     s   


r   c                  C   sH   t  rt S tt jjttfrtt jj} t	| S t jjf} t	| S r    )
r   	is_fbcoder   gccr   cppcxxr   tuplecpp_compiler_search)searchr!   r!   r%   cpp_compiler  s   
r   r   c                 C   s   | D ]V}zG|d u r@t jdkrW qtdsW qddlm} t }|tj|dt	d}| t
 }W d    n1 s;w   Y  t|dg |W   S  tjttfyX   Y qw t )NlinuxTORCH_INDUCTOR_INSTALL_GXXr   FileLockzg++.locktimeout	--version)rD   platformr1   getenvfilelockr   r   rH   rI   LOCK_TIMEOUTinstall_gcc_via_conda
subprocesscheck_outputSubprocessErrorFileNotFoundErrorImportErrorr   InvalidCxxCompiler)r   r   r   r   lockr!   r!   r%   r     s*   


r   c               	   C   s   t jt d} t j| dd}t j|sBtd t jdd}|du r+t	
d}|durBtj|dd	|  d
ddddgtjd |S )z>On older systems, this is a quick way to get a modern compilerr   binzg++zDownloading GCC via conda	CONDA_EXEcondaNcreatez	--prefix=z--channel=conda-forgez--quietz-yz
python=3.8gxx)stdout)r1   rH   rI   r9   r{   loginfor2   r3   shutilwhichr  
check_callPIPE)prefixcxx_pathr
  r!   r!   r%   r     s*   


r   c                   C   s   t dt S )Nz(gcc|g\+\+))rer   r   r!   r!   r!   r%   is_gcc  s   r  c                  C   s*   t  } t| dgd}d| d v S )Nr   utf8Appler   )r   r  r  r   
splitlines)r   version_stringr!   r!   r%   is_apple_clang  s   r  c                   @   s   e Zd ZU eed< eed< eed< eejef ed< dZ	dZ
dd Zejfd	ejfd
dZdd Zdd ZdefddZeddd ZdS )VecISA
_bit_width_macro_arch_flags_dtype_nelementsa[  
#if defined(CPU_CAPABILITY_AVX512) || defined(CPU_CAPABILITY_AVX2)
#include <ATen/cpu/vec/functional.h>
#include <ATen/cpu/vec/vec.h>
#endif

__attribute__((aligned(64))) float in_out_ptr0[16] = {0.0};

extern "C" void __avx_chk_kernel() {
    auto tmp0 = at::vec::Vectorized<float>(1);
    auto tmp1 = tmp0.exp();
    tmp1.store(in_out_ptr0);
}
zG
import torch
from ctypes import cdll
cdll.LoadLibrary("__lib_path__")
c                 C      | j S r    )r  ro   r!   r!   r%   	bit_width     zVecISA.bit_widthdtypec                 C   s
   | j | S r    )r!  )rp   r%  r!   r!   r%   	nelements  s   
zVecISA.nelementsc                 C   r"  r    )r  ro   r!   r!   r%   build_macro  r$  zVecISA.build_macroc                 C   r"  r    )r   ro   r!   r!   r%   build_arch_flags  r$  zVecISA.build_arch_flagsr   c                 C   s   t t| S r    )rX   r   ro   r!   r!   r%   __hash__  r   zVecISA.__hash__Nc           	      C   s   t jjd ur
t jjS ttjd\}}ddlm} t }|t	j
||d td}|\ |d d d }tt||d| d	}z%t||| tjtjd
tjd|gtji t	jddtj
id W n ty| } zW Y d }~W d    dS d }~ww 	 W d    dS 1 sw   Y  d S )Nr   r   r   .lockr   soF)warning_allvec_isaz-c__lib_path__
PYTHONPATH:)stderrenvT)r   r   
vec_isa_okr   r  	_avx_coder   r   r   r1   rH   rI   r   shlexsplitcpp_compile_commandcompile_filer  r  rD   
executable_avx_py_loadrC   DEVNULLr2   	Exception)	rp   r   
input_pathr   r   r  output_path	build_cmder!   r!   r%   __bool__  s<   
	
$zVecISA.__bool__)r   r   r   r   r   r   r   r@   r%  r5  r;  r#  r   r&  r'  r(  r)  r   r   rB  r!   r!   r!   r%   r    s   
 r  c                   @   V   e Zd ZU dZdZdZejdejdej	diZ
defddZejZeegef ed	< d
S )	VecAVX512i   CPU_CAPABILITY_AVX512z0-mavx512f -mavx512dq -mavx512vl -mavx512bw -mfma       r   c                 C   r'   )Navx512r!   ro   r!   r!   r%   __str__A  r&   zVecAVX512.__str__r)  Nr   r   r   r  r  r   r@   r   bfloat16float16r!  r   rI  r  r)  r   r   r   r!   r!   r!   r%   rD  :     
 rD  c                   @   rC  )VecAVX2   CPU_CAPABILITY_AVX2z-mavx2 -mfma   rF  r   c                 C   r'   )Navx2r!   ro   r!   r!   r%   rI  N  r&   zVecAVX2.__str__r)  NrJ  r!   r!   r!   r%   rN  G  rM  rN  c                   @   sL   e Zd ZU dZdZdZi ZdefddZdd Z	e
jZee
gef ed< d	S )
InvalidVecISAr   r=   r   c                 C   r'   )NINVALID_VEC_ISAr!   ro   r!   r!   r%   rI  Z  r&   zInvalidVecISA.__str__c                 C   r'   r(   r!   ro   r!   r!   r%   rB  ]  r&   zInvalidVecISA.__bool__r)  N)r   r   r   r  r  r   r!  r   rI  rB  r  r)  r   r   r   r!   r!   r!   r%   rS  T  s   
 rS  c                  C   sn   t jdkrg S g } td }| }tD ]}t||v r#|r#| | q| W  d    S 1 s0w   Y  d S )Nr   z/proc/cpuinfo)rD   r   rt   readsupported_vec_isa_listr   append)isa_list	_cpu_info_cpu_info_contentisar!   r!   r%   valid_vec_isa_listj  s   


$r\  c                  C   sP   t  } | stS tjjd u r| sJ | d S | D ]}tjj| kr%|  S qtS )Nr   )r\  invalid_vec_isar   r   simdlenr#  )_valid_vec_isa_listr[  r!   r!   r%   pick_vec_isax  s   r`  Tc                 C      | rdS dS )Nz-shared -fPICr=   r!   )sharedr!   r!   r%   
get_shared  r   rc  c                 C   ra  )Nz-Wallr=   r!   )r-  r!   r!   r%   get_warning_all_flag  r   rd  c                   C   r'   )Nz-std=c++17 -Wno-unused-variabler!   r!   r!   r!   r%   	cpp_flags  r&   re  c                   C   r'   )Nz-DTORCH_INDUCTOR_CPP_WRAPPERr!   r!   r!   r!   r%   cpp_wrapper_flags  r&   rf  c                  C   sV   d} t  r| S tjdkr| d7 } nt dkr| d7 } n| d7 } t  s)| d7 } | S )Nz%-O3 -ffast-math -fno-finite-math-onlydarwinz -Xclangppc64lez -mcpu=nativez -march=nativez	 -fopenmp)r   r   rD   r   machine)
base_flagsr!   r!   r%   optimization_flags  s   


rk  c                   C   r'   )Nz$-D C10_USING_CUSTOM_GENERATED_MACROSr!   r!   r!   r!   r%   use_custom_generated_macros  r&   rl  c                  C   s    t  rt } d|  dS dS )Nz-Wp,-fopenmp z( -D C10_USE_GLOG -D C10_USE_MINIMAL_GLOGr=   )r   r   r   
openmp_lib)rm  r!   r!   r%   use_fb_internal_macros  s   rn  c                   C   s   t  rdS dS )Nz	-nostdincr=   )r   r   r!   r!   r!   r%   use_standard_sys_dir_headers  s   ro  c                  C   sD   zd} t |  d}tt|dkW S  t jy!   Y dS w )Nzconda list llvm-openmp --jsonr  r   F)r  r  r7  r   lenra   loadsr  )commandoutputr!   r!   r%   is_conda_llvm_openmp_installed  s   rt  c                  C   sT   zt ddg t g dd } tj| }|| fW S  t jy)   Y dS w )Nr  brew)ru  z--prefixlibompr  )Fr=   )r  r  r   stripr1   rH   r{   r  )libomp_pathomp_availabler!   r!   r%   homebrew_libomp  s   
rz  Fr.  c                 C   s  t  rdtjvrdtjvrtjt tjd< ddlm	} |r't  r'd}d}t
jdkr| s:|tks:|s:t jjr||tdg }||td	g }g }t  si|g d
7 }|dg7 }|sh|dg7 }n|dg7 }|rz|tjt g7 }| }|rt  r|tkrt| }	d| d|	 d|	 d|	 dg}nd| }|rt  r|dg7 }n|g d7 }n||tdg }g }t
jdkrt  }
tdd urtjtddd}tj|}|r
|tjtdd |tjtdd ntd |
p|}
|
rg ndg}|
s`tdd ur`t  }
|
r`tjtdd}|tjtdd || t! j"dkr`tjtj|dr`dg}|
st# \}
}|
r|tj|d |tj|d nt  rdgndg}t  r|t$  |t%  |t&  |t'  |t(  |t)  |t*  |t+  |t,  |d ddd  |D }dd!d  |D }dd"d  |D }||||fS )#N	CUDA_HOME	CUDA_PATHr   )cpp_extensionTr=   r   includeLIBDIR)c10r@   	torch_cpugomptorch_pythonomp z-D CPU_CAPABILITY=z-D CPU_CAPABILITY_z-D HAVE__CPU_DEFINITIONz-DrB   )c10_cudarB   
torch_cudarg  
OMP_PREFIXzomp.hlibz-environment variable `OMP_PREFIX` is invalid.CONDA_PREFIXx86_64zlibiomp5.dylibiomp5c                 S      g | ]}d | qS )z-Ir!   r   pr!   r!   r%   
<listcomp>^      z1get_include_and_linking_paths.<locals>.<listcomp>c                 S   r  )z-Lr!   r  r!   r!   r%   r  _  r  c                 S   r  )z-lr!   r  r!   r!   r%   r  `  r  )-r   r   r1   r2   rH   dirnamer   rB   torch.utilsr}  rD   r   r]  r   enable_kernel_profileinclude_paths	sysconfigr   library_pathsget_config_varcpp_prefix_pathr'  r   upperrI   r(  r  r   r{   rW  warningswarnrt  unameri  rz  sleefopenmpgcc_includelibgcclibgcc_archlibgcc_backwardglibclinux_kernelgcc_install_tools_include)include_pytorchr.  rB   aot_moder}  macrosipathslpathslibscapry  header_path	valid_envconda_lib_pathrx  r!   r!   r%   get_include_and_linking_paths  s   







	





r  c              	   C   sD  t ||||\}}	}
}t r9|r| }|}ntj| }tj|}tjt t	 g}d
dd |D }n| }|}d}tddd
g dt  d| dt| dt| dt  d| d|	 d|
 d| d| dt  dt  dt  dt  d| d S )	Nr  c                 S   r  )z-Br!   r  r!   r!   r%   r  z  r  z'cpp_compile_command.<locals>.<listcomp>r=   z[ \n]+z
            z
            -o z	
        )r  r   r   r1   rH   r   r  r   ld	glibc_librI   r  subr   rc  rd  re  rk  rl  rn  ro  rw  )inputrs  r-  rb  r  r.  rB   r  r  r  r  r  inp_nameout_namelinker_pathsr!   r!   r%   r8  d  sf   


r8  c                   @   s4   e Zd Ze ZeejZedd Zedd Z	dS )CudaKernelParamCachec                 C   s,   t |ddtjd\}}||d< || j|< d S )Nr   )r   r   
cubin_path)r   r   aot_inductor_output_pathrh   )clsr   paramsr   r?   rH   r!   r!   r%   r     s   
zCudaKernelParamCache.setc                 C   s   | j |d S r    )rh   r3   )r  r   r!   r!   r%   r3        zCudaKernelParamCache.getN)
r   r   r   dictrh   r   clearclassmethodr   r3   r!   r!   r!   r%   r    s    


r  c                   @   (   e Zd Ze ZeejZedd ZdS )AotCodeCachec                    sF  |rt nt }ttdd||jd}t|d|tjd\} jvrddl	m
} t }|tj|d td	}	|	V tj|d d
 }
tj|
stt||
||jd}tdd| zt| W n tjy~ } zt||j|d }~ww td|
 |
 j< W d    n1 sw   Y   fdd}|S )Nio)r.  rB   r  r   )r   r   r   r   r*  r   z.so)r  rs  r.  rB   r  zaot compilation command: %sr  z.aot_inductor dynamic library already exist: %sc                     s<   t jdks	J  j gdd tt jd D R S )Nr   c                 s   s    | ]}d V  qd S r    r!   )r   r  r!   r!   r%   	<genexpr>  s    z=AotCodeCache.compile.<locals>.wrapper_call.<locals>.<genexpr>r   )rp  graph_outputsrh   range)r#   r  graphr   r!   r%   wrapper_call  s   *z*AotCodeCache.compile.<locals>.wrapper_call)r]  r`  r   r8  r  r   r   r  rh   r   r   r   r1   rH   rI   r   splitextr{   r6  r7  r  debugr  r  CalledProcessErrorr   CppCompileErrorrs  )r  r  source_coderB   picked_vec_isacpp_commandr>  r   r   r  	output_socmdrA  r  r!   r  r%   compile  sT   

	zAotCodeCache.compileN)	r   r   r   r  rh   r   r  r  r  r!   r!   r!   r%   r    
    
r  c                  C   sR   t tjd } |  }| }t|d\}}W d    |S 1 s"w   Y  |S )Nzcodegen/cpp_prefix.hh)r
   __file__r|   rt   rU  r   )rH   r   r   r?   filenamer!   r!   r%   r    s   


r  c                  C   s.   t  } t rdtj|  dS d|  dS )Nz
#include "")r  r   r   r1   rH   r   )r  r!   r!   r%   
cpp_prefix  s   r  r   c              
   C   sp  t  r
tj| n| }z|t  r~t }tj|}tj|}tjtjj	j
d}t B}t|tj|| t| tj|| tj|d}	t||	 t|||}
tj|rct| t|
| W d    n1 ssw   Y  W d S W d S tj|tjd W d S  tjy } z"|jd}d|v pd|v }|rtjdkrd}||7 }t|||d }~ww )Nr~  )r2  rW   z'omp.h' file not foundrv  rg  a  

OpenMP support not found. Please try one of the following solutions:
(1) Set the `CXX` environment variable to a compiler other than Apple clang++/g++ that has builtin OpenMP support;
(2) install OpenMP via conda: `conda install llvm-openmp`;
(3) install libomp via brew: `brew install libomp`;
(4) manually setup OpenMP and set the `OMP_PREFIX` environment variable to point to a path with `include/omp.h` under it.)r   r   r1   rH   r   r  rI   r@   utilsr}  _TORCH_PATHr4   TemporaryDirectoryr  copycopytreer   r{   remover  r  STDOUTr  rs  r   rD   r   r   r  )r>  r?  r  
input_filer  header_nameoutput_nametorch_includes_pathtmp_dirdest_include_pathoutput_file_pathrA  rs  openmp_probleminstructionr!   r!   r%   r9    s<   


(	r9  c                   @   s4   e Zd Ze ZeejZedd Zedd Z	dS )CppCodeCachec              
   C   s   zt | W S  tyG } z5dt|v r*tjdr*t dat | W  Y d }~S dt|v rBt| dt	  dt	  d| d }~ww )Nr  z/usr/lib64/libgomp.so.1z(failed to map segment from shared objectz3.  The most common reason this may occur is if the zl folder is mounted with noexec (e.g., by default Docker mounts tmp file systems as noexec).  Please remount zi with exec enabled, or set another temporary directory with TORCHINDUCTOR_CACHE_DIR environment variable.)
r   LoadLibraryOSErrorr   r1   rH   r{   _libgompr4   r5   )rH   rA  r!   r!   r%   _load_library,  s"   
zCppCodeCache._load_libraryc                 C   s   t  }ttdd|d}t|d|d\}}|| jvrnddlm} t }|tj	
||d td	}|4 |d d
 d }	tj	|	sQtt||	|d}
t||	|
 | |	| j|< || j| _W d    n1 siw   Y  | j| S )Nr  r  )r.  r   r   r   r   r*  r   r+  r,  )r  rs  r.  )r`  r   r8  r   rh   r   r   r   r1   rH   rI   r   r{   r6  r7  r9  r  r   )r  r  r  r  r   r>  r   r   r  r?  r  r!   r!   r%   ru   ?  s(   

zCppCodeCache.loadN)
r   r   r   r  rh   r   r  r  r  ru   r!   r!   r!   r%   r  (  s    

r  c                   @   sv   e Zd ZU e Zeeejf e	d< e Z
eejZedddZedddZeddd	Zeed
dd Zd
S )r   rh   r=   c                 C   s   t |d|dS Nr>   r  )r   )r  r  r   r!   r!   r%   r   ]  r  zPyCodeCache.writer!   c                 C   s    t |d|d\}}| |||S r  )r   r   )r  r  r   linemapr   rH   r!   r!   r%   ru   a  s   zPyCodeCache.loadc                 C   s   || j vrnt|]}z
t| |d}W n ty0 } ztd| dt|j d| d }~ww t	t d| }||_
||_t||j|j |tj|j< | j || tt| | j|< W d    n1 siw   Y  | j | S )NexeczFailed to import 
z: r<   )rh   rt   r  rU  r=  RuntimeErrortyper   types
ModuleTyper  r   r  __dict__rD   modulesr   r   ziplinemaps)r  r   rH   r  r   r   rA  modr!   r!   r%   r   f  s&   


zPyCodeCache.load_by_key_pathNc                 C   sV   || j vrd S | j | \}}t||}|dkrd S ||d  }|s#d S dd }||S )Nr   r   c                 S   s"   d}t || }dd t|D S )Nz"File "(.+)", line (\d+), in (.+)\nc                 S   s"   g | ]\}}}|t ||d qS ))r  linerJ   )r   )r   r   lnr!   r!   r%   r    s    zPPyCodeCache.stack_frames_for_code.<locals>.parse_stack_trace.<locals>.<listcomp>)r  findallreversed)stack_traceregexmatchesr!   r!   r%   parse_stack_trace  s
   z<PyCodeCache.stack_frames_for_code.<locals>.parse_stack_trace)r  r   )r  rH   linenolinesnodesr  entryr  r!   r!   r%   stack_frames_for_code|  s   


z!PyCodeCache.stack_frames_for_coder=   )r=   r!   )r!   )r   r   r   r  rh   r   r   r  r  r   r  r   r  r  r   ru   r   r   r   r  r!   r!   r!   r%   r   X  s   
 
r   c                   @   r  )CppWrapperCodeCachec                 C   s  d| }t |}tj|st| d}tj|| d| }td| || jvrtd| ddl	m
}	 t }
|	tj|
|d td	}| tj|std
| t }t }t }t }tt |d\}}}}t }t }| d| d| d| d| d| }| d| d| d}| }tjjj|||g|g|g|g|gdd}td| n.td| tj||}|d usJ tj|}t|jtj sJ |j!| td| || j|< W d    n1 sw   Y  | j| S )Ninline_extension_r,  r<   zCpp wrapper code path %szCpp wrapper cache miss for %sr   r   r*  r   zCpp wrapper building %s)r.  rB   r  z                     z -ffast-mathT)rJ   build_directorycpp_sources	functionsextra_cflagsextra_ldflagsextra_include_pathsuse_pchzCpp wrapper done building %sz(Found target .so, cpp wrapper loading %szCpp wrapper done loading %s)"rP   r1   rH   r{   r8   rI   r  r  rh   r   r   r   r   re  rk  rc  rd  r  r`  rl  rf  r@   r  r}  load_inline	importlibutilspec_from_file_locationmodule_from_specr   loaderr	   Loaderexec_module)r  r  	func_namer   rB   rJ   rN   extfilepathr   r   r  
_cpp_flags
_opt_flags_shared_warning_all_flag_ipaths_lpaths_libs_macros_use_custom_generated_macros_cpp_wrapper_flagsr  r  r  r  specr!   r!   r%   ru     sf   




/zCppWrapperCodeCache.loadN)	r   r   r   r  rh   r   r  r  ru   r!   r!   r!   r%   r    r  r  c                   @   s   e Zd Zedd ZdS )TritonCodeCachec                 C   s   t |}t||S r    )r   ru   getattr)r  kernel_namer  r  r!   r!   r%   ru     s   

zTritonCodeCache.loadN)r   r   r   r  ru   r!   r!   r!   r%   r.    s    r.  c                 C   s&   t | t| |}|j|d d S )N)warm_cache_only_with_cc)r   "set_compiler_worker_current_devicer.  ru   
precompile)r0  r  ccrT   kernelr!   r!   r%   _worker_compile  s   
r6  c                 C   s   t | |}|  |S r    )r.  ru   r3  )r0  r  r5  r!   r!   r%   _load_kernel  s   r7  c                   @   s   e Zd Zdd Zdd ZdS )TritonFuturec                 C   s   || _ || _|| _d S r    )r0  r  future)rp   r0  r  r9  r!   r!   r%   rq     s   
zTritonFuture.__init__c                 C   sv   t  }t| dr| jS | j  t| j| j }| _t  | }|dkr3td| d| j  t| j | `| `| `|S )Nr5  2   z"Detected long compilation time of z seconds for kernel name )	r   hasattrr5  r9  resultr7  r0  r  r   )rp   t0r5  latencyr!   r!   r%   r<    s   



zTritonFuture.resultN)r   r   r   rq   r<  r!   r!   r!   r%   r8    s    r8  c                   @   s   e Zd Zdd Zeeddd Zeeddd Ze	dd	 Z
e	d
d Ze	dd Zdd Zdd Zdeeef fddZdS )AsyncCompilec                 C   r   r    r!   ro   r!   r!   r%   rq   
  r&   zAsyncCompile.__init__r   c                   C   s   t jdksJ tt jS Nr   )r   compile_threadsr   r!   r!   r!   r%   pool  s   
zAsyncCompile.poolc                     s^   t   tjdksJ t   fdd} td}ttj|| d}tj	j
d |jtjd |S )Nr   c                     s$    fdd} t | ddat  d S )Nc                      s*   	 t d  t krtt tj q)NTr   )r   r1   getppidkillr   signalSIGKILLr!   	orig_ppidr!   r%   run#  s
   z4AsyncCompile.process_pool.<locals>.init.<locals>.runT)targetdaemon)r   _watchdog_threadstart)rI  rG  r!   r%   init"  s   z'AsyncCompile.process_pool.<locals>.initfork)
mp_contextinitializer)exitpriority)r   _propertiesr   rA  r1   r   multiprocessingget_contextr   r  FinalizeshutdownrD   maxsize)rN  fork_contextrB  r!   rG  r%   process_pool  s   
zAsyncCompile.process_poolc                 C   sZ   t jdkrd S t  |  }t|dr|  ntt jD ]}|  q|  t	  d S )Nr   _start_queue_management_thread)
r   rA  r*   rZ  r;  r[  r  _adjust_process_count_start_executor_manager_threadr-   )r  rB  r?   r!   r!   r%   	warm_pool:  s   




zAsyncCompile.warm_poolc                 C   s   t jdkr| S |  |S r@  )r   rA  rB  submit)r  taskr!   r!   r%   r_  W  s   
zAsyncCompile.submitc                    sB   t jdkst|dkrtt|S dd  fdd|D D S )Nr   c                 S   s   g | ]}|  qS r!   )r<  )r   tr!   r!   r%   r  a  r  z$AsyncCompile.map.<locals>.<listcomp>c                    s   g | ]
}   |qS r!   )rB  r_  )r   xr  fnr!   r%   r  a  s    )r   rA  rp  r   map)r  rd  seqr!   rc  r%   re  ]  s   zAsyncCompile.mapc                 C   s`   t   tjdkr+tj \}}tj }|d | }|  t	||||}t
|||S t||S )Nr   
   )r*   r   rA  r@   rB   get_device_capabilityr\   rZ  r_  r6  r8  r7  )rp   r0  r  rF   rG   rT   r4  r9  r!   r!   r%   rR   c  s   



zAsyncCompile.tritonc                    s    fdd}|  |S )Nc                      s   t  jS r    )r  ru   r5  r!   r  r!   r%   r`  r  r   zAsyncCompile.cpp.<locals>.task)r_  )rp   r  r`  r!   ri  r%   r   q  s   
zAsyncCompile.cppscopec                 C   s   t dd | D }t|dtjdd}tjdkrA| D ]#\}}tjr.t|ts.|	| t|t
tfr@| ||< |d qt  d S )Nc                 S   s"   g | ]\}}t |ttfr|qS r!   )r   r   r8  )r   r   r   r!   r!   r%   r  y  s    z%AsyncCompile.wait.<locals>.<listcomp>zInductor Compilationr   )totaldescdisabledelayr   )rp  itemsr   r   disable_progressrA  verbose_progressr   r   set_postfix_strr   r8  r<  updater-   )rp   rj  num_kernelspbarr   r<  r!   r!   r%   waitw  s(   



zAsyncCompile.waitN)r   r   r   rq   r   r   r   rB  rZ  r  r^  r_  re  rR   r   r   r   r   rv  r!   r!   r!   r%   r?  	  s"    %


r?  r  )r=   r   )r=   r   r=   )T)r   N)r   dataclassesr   r6   r_   r  ra   loggingrT  r1   r   r   r  r6  r  rE  r  rD   r  r4   r   r  r  r   bisectr   concurrent.futuresr   r   r   ctypesr   r   r   r	   r
   r   r   r   typingr   r   r   r   r   r   r@   torch._inductorr   r   r   torch._inductor.utilsr   	torch.hubr   r   rH   abspathr  _HEREr  r  r   	triton.fbr   triton.fb.buildr   torch._inductor.fb.utilsr   r   r   r   r+   r)   r*   r-   	getLoggerr   r  r   r9   rP   rQ   r   r   r   r   r   r   r   r   r   r}   	dataclassr   r   r   r   r   r  r  r  rD  rN  rS  r]  rV  r\  r`  rc  rd  re  rf  rk  rl  rn  ro  rt  rz  r  r8  r  r  r  r  r9  r  r   r  r.  r6  r7  r8  r?  r^  r!   r!   r!   r%   <module>   s    
	
GM 




\



	

 
+@

-0?G 