o
    hf                     @   sv  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlZ	d dl
Z
d dlmZmZ d dlmZ d dlmZmZ d dlmZ d dlmZmZ d d	lmZmZmZmZmZmZmZm Z  e rfd d
lm!Z" nd dlm"Z" dd Z#dd Z$dd Z%dd Z&dd Z'dd Z(dd Z)dd Z*dd Z+dd Z,d d! Z-d"d# Z.d$d% Z/d&d' Z0d(d) Z1d*d+ Z2d,d- Z3e4d.kre3  dS dS )/    N)deepcopy)Path)
DataLoaderDataset)Accelerator)SeedableRandomSamplerprepare_data_loader)AcceleratorState)RegressionDatasetare_the_same_tensors)DistributedTypegatheris_bf16_availableis_ipex_availableis_npu_availableis_xpu_availableset_seedsynchronize_rng_states)RegressionModel4XPU)RegressionModelc                 C      t d| j  d S )NzPrinting from the main process printprocess_indexstate r   _/var/www/html/ai/venv/lib/python3.10/site-packages/accelerate/test_utils/scripts/test_script.py
print_main3      r   c                 C   r   )Nz%Printing from the local main process )r   local_process_indexr   r   r   r   print_local_main7   r   r!   c                 C   r   )NPrinting from the last process r   r   r   r   r   
print_last;   r   r#   c                 C   s   t d| d| j  d S )NPrinting from process : r   )r   process_idxr   r   r   print_on?   s   r'   c               	   C   s  t  } | j}td}|  E | jr2td t|d}|d W d    n1 s,w   Y  nt|d}|d W d    n1 sGw   Y  W d    n1 sVw   Y  | 	  | jrt|d}d
| }W d    n1 syw   Y  z2|dsJ d|d	kr|dsJ d|d| jd	 ksJ d
|dd	  d| j W n ty   |   w | jr| r|  | 	  t }t| | t| j W d    n1 sw   Y  |  }| jr|dksJ | dn|  dksJ | d|d |d t| | t| j W d    n	1 s7w   Y  | jrL|  dksKJ n|  dksWJ |d |d t| |  t!| j W d    n	1 szw   Y  | j"r|  d| jjd	  ksJ n|  dksJ |d |d t#|D ]R}t| | j$t%|d| j| W d    n	1 sw   Y  | j&|kr|  d| d| j& ksJ n|  dksJ |d |d qd S )Nzcheck_main_process_first.txt皙?za+zCurrently in the main process
zNow on another process
r zMain process was not first   zOnly wrote to file zNow on another processz times, not z Printing from the main process 0z$ != Printing from the main process 0z != ""r   z&Printing from the local main process 0r"   )r   r$   r%   )'r   num_processesr   main_process_firstis_main_processtimesleepopenwritewait_for_everyonejoin	readlines
startswithendswithcountAssertionErrorunlinkexistsioStringIO
contextlibredirect_stdouton_main_processr   r   getvaluerstriptruncateseekon_local_main_processr!   is_local_main_processon_last_processr#   is_last_processrange
on_processr'   r   )acceleratorr,   pathftextresultr&   r   r   r   process_execution_checkC   s   

 



&

&
rP   c                  C   s$   t  } | jdkrtd t|  d S )Nr   zTesting, testing. 1, 2, 3.)r	   r    r   r   r   r   r   init_state_check   s   
rQ   c                  C   s   t  } tdg tt sJ d| jtjkr)tdg ttj s(J dn| jtj	kr?tdg ttj
 s?J dt }tdg|d t| sTJ d	| jd
kr_td d S d S )Ntorchz*RNG states improperly synchronized on CPU.cudaz*RNG states improperly synchronized on GPU.xpuz*RNG states improperly synchronized on XPU.	generator)rU   z0RNG states improperly synchronized in generator.r   zAll rng are properly synched.)r	   r   r   rR   get_rng_statedistributed_typer   	MULTI_GPUrS   	MULTI_XPUrT   	Generator	get_stater    r   )r   rU   r   r   r   rng_sync_check   s   



r\   c                  C   s"  t  } d| j }tt|dd}t|| j| j| jdd}g }|D ]	}|t| q t	
|}t| j|t| t	| t	d| sJJ dtt|dd}t|| j| j| jddd}g }|D ]	}|t| qct	
|}t	| t	d| sJ d| jdkrtd	 tt|ddd
}t|| j| j| jdd}g }|D ]	}|t| qt	
| }|  |tt|ksJ dtt|ddd
}t|| j| j| jddd}g }|D ]	}|t| qt	
| }|  |tt|ksJ d| jdkrtd d S d S )N       
batch_sizeT)put_on_devicer   %Wrong non-shuffled dataloader result.)ra   split_batchesz Non-shuffled dataloader passing.r`   shuffle!Wrong shuffled dataloader result.zShuffled dataloader passing.)r	   r,   r   rI   r   devicer   appendr   rR   catr   typeequalcpuarangelongtolistsortlistr    r   lengthdlrO   batchr   r   r   dl_preparation_check   sh   

$
$
rv   c               	   C   s  t  } d| j }tt|dd}t|| j| j| jddd}g }|D ]	}|t| q!t	
|}t	| t	d| sBJ dtt|dd}t|| j| j| jdddd}g }|D ]	}|t| q\t	
|}t	| t	d| s}J d| jdkrtd	 tt|ddd
}t|| j| j| jddd}g }|D ]	}|t| qt	
| }|  |tt|ksJ dtt|ddd
}t|| j| j| jdddd}g }|D ]	}|t| qt	
| }|  |tt|ksJ d| jdkr	td d S d S )Nr]   r^   r_   T)ra   dispatch_batchesr   rb   )ra   rc   rw   z(Non-shuffled central dataloader passing.rd   rf   z$Shuffled central dataloader passing.)r	   r,   r   rI   r   rg   r   rh   r   rR   ri   rk   rl   rm   rn   r   ro   rp   rq   r    rr   r   r   r   central_dl_preparation_check   sr   

$	
$
	rx   c                  C   s   t  } G dd dt}G dd d}|td| j }|t|dd}t||d}t|| j| j| j}t	|j
d	rFt|j
j
|sDJ d
d S t|j
|sPJ d
d S )Nc                   @   s$   e Zd Zdd Zdd Zdd ZdS )z+custom_sampler_check.<locals>.CustomDatasetc                 S   s
   || _ d S Ndata)selfr{   r   r   r   __init__(     
z4custom_sampler_check.<locals>.CustomDataset.__init__c                 S   s
   t | jS ry   )lenr{   r|   r   r   r   __len__+  r~   z3custom_sampler_check.<locals>.CustomDataset.__len__c                 S   s
   | j | S ry   rz   )r|   indexr   r   r   __getitem__.  r~   z7custom_sampler_check.<locals>.CustomDataset.__getitem__N)__name__
__module____qualname__r}   r   r   r   r   r   r   CustomDataset'  s    r   c                   @   s4   e Zd ZddededefddZdd Zd	d
 ZdS )z0custom_sampler_check.<locals>.CustomBatchSamplerTdataset_lengthr`   re   c                 S   s   || _ t|| _|| _d S ry   )r`   nprm   
data_indexre   )r|   r   r`   re   r   r   r   r}   2  s   
z9custom_sampler_check.<locals>.CustomBatchSampler.__init__c                 s   s@    t | }| jrtj| j}n| j}t||}|E d H  d S ry   )r   re   r   randompermutationr   array_split)r|   num_batchesr   outputr   r   r   __iter__7  s   z9custom_sampler_check.<locals>.CustomBatchSampler.__iter__c                 S   s   t t| j| j S ry   )mathceilr   r   r`   r   r   r   r   r   @  s   z8custom_sampler_check.<locals>.CustomBatchSampler.__len__N)T)r   r   r   intboolr}   r   r   r   r   r   r   CustomBatchSampler1  s    	r   r]   r^   r_   )batch_samplerr   z>Custom sampler was changed after calling `prepare_data_loader`)r	   r   rI   r,   r   r   r   rg   r   hasattrr   
isinstance)r   r   r   datasetsamplerrt   r   r   r   custom_sampler_check$  s$   
r   c                 C   s   t d |d t| dd}t||t|d}t|||d}t }tjj	|
 dd}tdD ]#}|D ]}	|  ||	d }
tjj|
|	d	 }|  |  q4q0||fS )
N*   )rs   seed)rU   data_sourcenum_samples)r`   r   r(   lr   xy)r   manual_seedr
   r   r   r   r   rR   optimSGD
parametersrI   	zero_gradnn
functionalmse_lossbackwardstep)rs   r`   rU   	train_setr   train_dlmodel	optimizerepochru   r   lossr   r   r   mock_trainingR  s(   

r   c                  C   s  t  } t }d}|d | j }t||| j |\}}t|js$J dt|js-J dt }t	||d|d}t
 }tjj| dd}	||||	\}}}	td |d td	D ]$}
|D ]}|  ||d
 }tjj||d }|| |	  q`q\|| }t|j|jsJ dt|j|jsJ d|d tdd}t	||| j d|d}t
 }tjj| dd}	||||	\}}}	td |d td	D ]$}|D ]}|  ||d
 }tjj||d }|| |	  qq|| }t|j|jsJ dt|j|jsJ d|d tj s.t rtd t   tdd}t	||d|d}t
 }tjj| dd}	||||	\}}}	td |d td	D ]&}|D ] }|  ||d
 }tjj||d }|| |	  qkqg|| }t|j|jsJ dt|j|jsJ dtj rtd t   tdd}tj dd}||}|j|dd}t!ddgj"tj#|j$d}||}t% rltd t   tdd}t	||d|d}t
 }tjj| dd}	||||	\}}}	td |d td	D ]&}|D ] }|  ||d
 }tjj||d }|| |	  q(q$|| }t|j|js_J dt|j|jslJ dt& rtd t   tddd}t	||d|d}t
 }tjj| dd}	||||	\}}}	td |d td	D ]&}|D ] }|  ||d
 }tjj||d }|| |	  qq|| }t|j|jsJ dt|j|jsJ dt' rxtd t   tddd}t	||d|d}t
 }tjj| dd}	||||	\}}}	td |d td	D ]&}|D ] }|  ||d
 }tjj||d }|| |	  q4q0|| }t|j|jskJ dt|j|jszJ dd S d S )Nr^      z0Did not obtain the same model on both processes.T)r`   re   rU   r(   r   r   r   r   r   z=Did not obtain the same model on CPU or distributed training.zVTraining yielded the same results on one CPU or distributed setup with no batch split.)rc   zSTraining yielded the same results on one CPU or distributes setup with batch split.zFP16 training check.fp16)mixed_precisionzKeep fp32 wrapper check.   )keep_fp32_wrapperr+   )dtyperg   zBF16 training check.bf16zipex BF16 training check.)r   rl   zxpu BF16 training check.Fz=Did not obtain the same model on XPU or distributed training.)(r	   rR   rZ   r,   r   r   abr   r   r   r   r   r   preparer   r   rI   r   r   r   r   r   r   unwrap_modelrl   allcloser   rS   is_availabler   _reset_stateLinearTensortofloat16rg   r   r   r   )r   rU   r`   rs   r   	old_modelrK   r   r   r   r   ru   r   r   _model_with_fp32_wrapperinput_tensorr   r   r   training_checkk  s  




















r   c                  C   s   t  } ttdd| j }| |}t|dks&J d| j dt| W d    n1 s0w   Y  ttdd| j d }| j|dd(}| jrit	t|| j }t||ksiJ d	| j dt| W d    n1 ssw   Y  | 
  d S )
Nr   r   z4Each process did not have two items. Process index: z
; Length: r   r+   T)apply_paddingz;Last process did not get the extra item(s). Process index: )r	   rq   rI   r,   split_between_processesr   r   rH   r   r   r3   )r   r{   resultsnum_samples_per_devicer   r   r   !test_split_between_processes_list  s"   r   c                  C   s  t  } g d}g d}tg d}| jdv r>|||d}t|}| |}| jdkr@|d |d d d| j  ks?J n5| jdkrT|d |d d	d  ksSJ n!| jd
kru|d |d dd  ksuJ d|d d  d|d  | jdkr|d |d d d| j  ksJ n'| jdkr|d |d d	d  ksJ n| jd
kr|d |d dd  ksJ | jdkrt|d |d d d| j  sJ d|d d d| j   d|d  nO| jdkrt|d |d d	d  sJ d|d d	d   d|d  n'| jd
kr.t|d |d dd  s.J d|d dd   d|d  W d    n	1 s9w   Y  |   d S )N)r+   r   r   r            r^   )r   r   cderM   gh)r+   r   r   )r   r   r   r   r   r^   r   r   r   z
Expected: z
, Actual: r   r   z7Did not obtain expected values on process 0, expected `z`, received: z7Did not obtain expected values on process 2, expected `z7Did not obtain expected values on process 4, expected `)	r	   rR   tensorr,   r   r   r   r   r3   )r   r   r   r   r{   	data_copyr   r   r   r   (test_split_between_processes_nested_dict%  sR   
$

8
$


$r   c                  C   s   t  } | jdkrTtg dg dg| j}| |0}| jdkr4t|tg d| js3J nt|tg d| jsEJ W d    n1 sOw   Y  | 	  d S )Nr+   )r   r+   r   r   )r   r   r   r   r   )
r	   r,   rR   r   r   rg   r   r   r   r3   )r   r{   r   r   r   r   #test_split_between_processes_tensorK  s   

$"r   c                  C   sH   t  } |  du sJ | jr|   |  du sJ |  du s"J d S )NFT)r   check_triggerr.   set_trigger)rK   r   r   r   test_triggerW  s   r   c                  C   s@  t  } | j}|jdkrtd t  |  |jtjkr"t	j
 }n|j}||jkrZ|jdkr3td t  |jdkr?td t  |jdkrKtd t  |jdkrWtd t  |jdkrctd t  |jdkrotd t  |jtjkr{t  t  |jtjkrd S |jdkrtd	 t  |jdkrtd
 t  d S )Nr   z**Initialization**z
**Test process execution**z+
**Test split between processes as a list**z+
**Test split between processes as a dict**z-
**Test split between processes as a tensor**z1
**Test random number generator synchronization**z 
**DataLoader integration test**z
**Training integration test**z
**Breakpoint trigger test**)r   r   r    r   rQ   r3   rW   r   rX   rR   rS   device_countr,   r   rP   r   r   r   r\   rv   TPUrx   r   	DEEPSPEEDr   r   )rK   r   num_processes_per_noder   r   r   mainh  sN   










r   __main__)5r>   r<   r   r/   copyr   pathlibr   numpyr   rR   torch.utils.datar   r   
accelerater   accelerate.data_loaderr   r   accelerate.stater	   accelerate.test_utilsr
   r   accelerate.utilsr   r   r   r   r   r   r   r   r   r   r   r!   r#   r'   rP   rQ   r\   rv   rx   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   sL   (I=A. (&7
