o
    h                     @   s   d dl Z d dlZd dlmZ d dlmZ d dlmZ ee	Z
dZdZdZdZ				
		ddedededededefddZdd Zdd ZdejfddZdS )    N)closing)
get_loggerzAddress already in usezSocket Timeoutz_tcp_store/num_membersz_tcp_store/last_member   X  T   	is_serverserver_addrserver_port
world_sizetimeoutwait_for_workersc              
   C   s  |dkr|dkrt d| d| |dkrtd| |dkr"|nd}	 |dkr,|}nt }td|||| | ztj|||| tj|d|d	}	|rPt|	| td
 |	W S  t	y }
 z*t
|
tkr||k rvtd||| |d7 }nt	d| d| d|
 W Y d }
~
nd }
~
ww q%)Nr   r   zCserver_port must be specified when world_size > 1, got server_port=z, world_size=z+sever_port: %s, specified, ignoring retriesTzVCreating c10d store on %s:%s
  world_size  : %s
  is_server   : %s
  timeout(sec): %s
)seconds)	host_nameportr   	is_masterr   r   zSuccessfully created c10d storez)port: %s already in use, attempt: [%s/%s]zon z, port: z already in use)
ValueErrorloginfoget_free_portdistTCPStoredatetime	timedelta_check_full_rankRuntimeErrorstr_ADDRESS_IN_USEwarning)r   r	   r
   r   r   r   retriesattemptr   storee r#   a/var/www/html/ai/venv/lib/python3.10/site-packages/torch/distributed/elastic/utils/distributed.pycreate_c10d_store   sZ   	

	


r%   c              
   C   sj   |  td}||kr| td z| t W d S  ty4 } zt|tkr/td| d| d }~ww )Nr   z<val_ignored>ztimed out waiting for all z members to join)	add_MEMBER_CHECKINset_LAST_MEMBER_CHECKINgetr   r   _SOCKET_TIMEOUTTimeoutError)r!   r   idxr"   r#   r#   r$   r   \   s   
r   c                  C   s>   t  } t|  |  d W  d    S 1 sw   Y  d S )Nr   )get_socket_with_portr   getsockname)sockr#   r#   r$   r   l   s   

$r   returnc                  C   s   t jddt jt jd} | D ]<}|\}}}}}t  |||}z|d |d |W   S  tyI } z|  tj	d|d W Y d}~qd}~ww t
d)	a  
    Returns a free port on localhost that is "reserved" by binding a temporary
    socket on it. Close the socket before passing the port to the entity
    that requires it. Usage example

    ::

    sock = _get_socket_with_port()
    with closing(sock):
        port = sock.getsockname()[1]
        sock.close()
        # there is still a race-condition that some other process
        # may grab this port before func() runs
        func(port)
    	localhostN)hostr   familytype)r2   r   r   zSocket creation attempt failed.)exc_infozFailed to create a socket)socketgetaddrinfo	AF_UNSPECSOCK_STREAMbindlistenOSErrorcloser   r   r   )addrsaddrr4   r5   proto_sr"   r#   r#   r$   r.   r   s    


r.   )r   r   r   Tr   )r   r7   
contextlibr   torch.distributeddistributedr   'torch.distributed.elastic.utils.loggingr   __name__r   r   r+   r'   r)   boolr   intfloatr%   r   r   r.   r#   r#   r#   r$   <module>   s>   
C