o
    hwR                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dl	Z	d dl
mZ d dlZd dlZd dlmZ dZdhddZd	d
 Zdd Zi Zdejjejjfed< deejjfed< ddd ejjfed< ejjejjfed< ejjejjfed< ejjejjfed< eejjfed< eejjfed< dd dd fed< dd dd fed< ejjdd fed< edd fed < d!d d"d fed#< d$d d%d fed&< ejjd'd fed(< ejjd)d fed*< ejjd+d fed,< ed-d fed.< ed/d fed0< d1d d2d fed3< d4d d5d fed6< i Zd7d8ged< d7d8ged< d7d8ged< d7ged< d7ged< d9ged< d7d8ged:< d;ged< d<d=ged< d<d=ged>< d?d@ged(< d?d@ged,< d?d@ged*< dAged#< d<ged < dBged3< dCged&< dDged6< d?ged.< d?ged0< dEgZg dFZej ej!ej"gZ#g dGZ$e%eeee#e$Z&dHdI e&D Z'e	j(j)dJe&e'dKdLdM Z*dEgZg dNZej ej!gZ#e%eeee#Z&dOdI e&D Z'e	j(j)dPe&e'dKdQdR Z+dEgZg dNZej ej!ej"gZ#g dSZ$e%eeee#e$Z&dTdI e&D Z'e	j(j)dJe&e'dKdUdV Z,dEgZg dNZej gZ#dWdXgZ-e%eeee#e-Z&dYdI e&D Z'e	j(j)dZe&e'dKd[d\ Z.d]gZd]gZej ej!gZ#g d^Z$e%eeee#e$Z&d_dI e&D Z'e	j(j)dJe&e'dKd`da Z/dbgZej!gZ#dcgZ0dgZ$e%eee#e$e0Z&dddI e&D Z'e	j(j)dee&e'dKdfdg Z1dS )i    N)product)join)Lion   MbP?c                 C   sZ   t j| |||d}|dk  }||kr+td| d|  t jj| |||d d S d S )N)rtolatolr   z"Too many values not close: assert z < )torchisclosesumitemprinttestingassert_close)abr   r   max_error_countidxerror_count r   F/var/www/html/ai/venv/lib/python3.10/site-packages/tests/test_optim.pyassert_most_approx_close   s   r   c                  C   s$   dt t  } tj| dd | S )Nz/tmp/autoswap/T)exist_ok)struuiduuid4osmakedirspathr   r   r   get_temp_dir   s   r    c                 C   s   t |  d S )N)shutilrmtreer   r   r   r   rm_path#   s   r#   adam_pytorchlion_pytorchc                 C      t j| ddS N{Gz??r	   optimSGDpxxr   r   r   <lambda>+       r/   momentum_pytorchadampaged_adamw
paged_adamlion
paged_lionc                 C   r&   r'   r*   r-   r   r   r   r/   4   r0   c                 C      t jj| ddddS Nr(   r)   F
block_wise)bnbr+   r,   r-   r   r   r   r/   5       momentumc                 C   r&   r'   r	   r+   RMSpropr-   r   r   r   r/   8   r0   c                 C   r7   r8   )r;   r+   r?   r-   r   r   r   r/   9   r<   rmspropc                 C      t jj| ddS NFr9   r;   r+   Adam8bitr-   r   r   r   r/   ;   r0   adam8bitc                 C   rA   rB   r;   r+   Lion8bitr-   r   r   r   r/   <   r0   lion8bitc                 C   r&   r'   r*   r-   r   r   r   r/   >   r0   c                 C   r7   r8   r;   r+   SGD8bitr-   r   r   r   r/   ?   r<   momentum8bitc                 C   r&   r'   r>   r-   r   r   r   r/   B   r0   c                 C   r7   r8   r;   r+   RMSprop8bitr-   r   r   r   r/   C   r<   rmsprop8bitc                 C   rA   NTr9   rC   r-   r   r   r   r/   F   r0   adam8bit_blockwisec                 C   rA   rO   )r;   r+   PagedAdamW8bitr-   r   r   r   r/   G   r0   paged_adamw8bit_blockwisec                 C   rA   rO   )r;   r+   PagedAdam8bitr-   r   r   r   r/   H   r0   paged_adam8bit_blockwisec                 C   rA   rO   rF   r-   r   r   r   r/   I   r0   lion8bit_blockwisec                 C   rA   rO   )r;   r+   PagedLion8bitr-   r   r   r   r/   J   r0   paged_lion8bit_blockwisec                 C   r&   r'   r*   r-   r   r   r   r/   L   r0   c                 C   r7   Nr(   r)   Tr9   rI   r-   r   r   r   r/   M   r<   momentum8bit_blockwisec                 C   r&   r'   r>   r-   r   r   r   r/   P   r0   c                 C   r7   rX   rL   r-   r   r   r   r/   Q   r<   rmsprop8bit_blockwise)exp_avgstate1)
exp_avg_sqstate2)momentum_bufferr\   lamb)
square_avgr\   )r[   r\   qmap1max1)r]   r^   qmap2max2lamb8bit)r[   r\   rb   absmax1)r]   r^   rd   absmax2)r_   r\   rb   rc   )r_   r\   rb   rg   )ra   r\   rb   rc   )ra   r\   rb   rg      )    ri        )r2   r=   r@   r3   r4   r5   r6   c                 C      g | ]}d j | qS z!dim1_{}_dim2_{}_gtype_{}_optim_{}format.0valsr   r   r   
<listcomp>o       rt   zdim1, dim2, gtype, optim_name)idsc              	   C   sh  |t jkr|dv rt  | dkr|dkrd S t j| |d|dd }| }| }t| d |g}t| d |g}|t jkrFd\}}	n|t jkrPd\}}	nd	\}}	t	t
D ]}
t j| |d|dd
 }|  |_| |_|  |  t| D ]\}}t jj|j| | |j| |  ||	d q}t|| ||	dd |
t
d  dkr|
dkrt }t | t|d ~d }t| d |g}|t t|d t| t|| ||	dd t| D ]\}}t|j| | |j| | ||	dd q|t jkr |j|j |_||j t j||j| |dv r1|j| d dks1J qXd S )N)r=   r@   rl   cudadevicedtype皙?r   gư>h㈵>)r   r(   -C6?r   r(   r   r   
   r      opt.pt)r   r   r   )larsr`   	unorm_vecg        )r	   bfloat16pytestskiprandnclonefloatstr2optimizersfloat32rangekgradstepstr2statenamesr   r   staterw   r   r    save
state_dictr   load_state_dictloadr#   datatorz   copy_)dim1dim2gtype
optim_namep1p2torch_optimizerbnb_optimizerr   r   igname1name2r   r   r   r   test_optimizer32bitp   sb   




	
r   )rj   ri   rk   c                 C   rm   )zdim1_{}_dim2_{}_gtype_{}ro   rq   r   r   r   rt      ru   zdim1, dim2, gtypec                 C   s  | dkr
|dkr
d S t j| |d|dd }t j| |d|dd }t j| |d|dd }t |dk }d}d}d}	d}
tjj   tjj |d	d
 tjj 	|||g |
 }|
 }|
 }tj|||g|	||f|
}|t jkr}d\}}nd\}}tdD ]P}t j| |d|dd d }t j| |d|dd d }t j| |d|dd d }||_||_||_|  |j| d jt jksJ |j| d jt jksJ qd S )Nrl   cpurx   r{   r)   +?r   :0yE>
optim_bits   r|   r~   2   rw   r\   r^   )r	   r   	rand_liker;   r+   GlobalOptimManagerget_instance
initializeoverride_configregister_parametersrw   Adamr   r   r   r   r   rz   uint8)r   r   r   r   r   p3maskbeta1beta2lrepsadam2r   r   r   g1g2g3r   r   r   test_global_config   sF   

r   )rE   rH   rK   rN   rP   rU   rY   rZ   c                 C   rm   rn   ro   rq   r   r   r   rt          
c              	   C   s  |t jkr|dvrt  | dkr|dkrd S t j| |d|dd }| }| }d}t| d |g}t| d |g}|t jkrLd\}	}
d	\}}n|t jkrZd\}	}
d
\}}nd\}	}
d	\}}g }g }t	dD ]}t j| |d|dd }|  |_
| |_
|  |  t|| ||dd g }t| D ]Q\}}}}d|v rtj|j| | |j| | |j| | |d}ntj|j| | |j| | |j| | d}t j|j| | ||	|
ddk}||  qt || }|t |d  }|jt jkr| dk sJ | dk sJ n| dk s"J | dk s+J ||   ||   |d dkr|dkrtt| |D ]\\}}}}}| }|j| |  }|j| |  }t }t | t|d ~d }t| d |g}|t t|d t| t j !||j| |  t j !||j| |  d|v rtj|j| | |j| | |j| | |d}ntj|j| | |j| | |j| | d}t j !|| t j|j| | ||	|
ddk}|"  dk sJ qPt|| ||dd |j#$| |_#|%|j# t j !|$|| tt| |D ]\\}}}}}|j| | %|j# q3qjd S )N)rP   rU   rl   rw   rx   r{      r   )g~jth?r   )r}   r   )r   r(   d   r(   r   r   	blockwise)codeabsmaxA	blocksize)r   r   r   r   g&.>ga2U0*#?g-C6Z?giUMu?ga2U0*S?r   r   r   )&r	   r   r   r   r   r   r   r   r   r   r   r   r   r   Fdequantize_blockwiser   
dequantizer
   appendabsrz   meanr   zipr    r   r   r   r   r   r#   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   patolprtolerrors	relerrorsr   r   dequant_statesr   r   qmapmax_vals1num_not_closeerrrelerrss1cpyraws1cpyrb   r   r   r   r   test_optimizer8bit   s   





 r   rj   r   c                 C   rm   )z&dim1_{}_dim2_{}_gtype_{}_optim_bits_{}ro   rq   r   r   r   rt     s    zdim1, dim2, gtype, optim_bitsc              	   C   s  | dkr
|dkr
d S t j| |d|dd }d}d}d}d}| }| }	tjj|g|||f||d	}
tjj|	g|||f||d
d}t d }d}tdD ]}|d7 }t j| |d|dd d|  }| }||	_	t
|||d
\}}}| | |}||_	|
  |  |dkrt j||	 t jj|
j| d |j|	 d ddd t jj|
j| d |j|	 d ddd nU|dkrt jj||	ddd t jj|
j| d |j|	 d ddd t jj|
j| d |j|	 d ddd |
j| d |j|	 d  |
j| d |j|	 d  |d dkrJ|dkrJt }t | t|d ~d }tjj|	g|||f||d
d}|t t|d qOd S )Nrl   r   rx   r{   r)   r   r   r   )r   r   )r   percentile_clippingr   r   r   rw   r(   rj   r\   g-C6
?r   r   r^   r      r   r   )r	   r   rw   r   r;   r+   r   zerosr   r   r   r   r   r   r   r   r   r   r   r    r   r   r   r   r   )r   r   r   r   r   r   r   r   r   r   adam1r   	gnorm_vecr   r   r   r   current_gnormclip_valgnorm_scaler   r   r   r   test_adam_percentile_clipping  s   	

r   i   )rP   rT   rR   rW   c                 C   rm   rn   ro   rq   r   r   r   rt     r   c                 C   s   | dkr
|dkr
d S t j| |d|dd }t| d |g}t j| |d|dd }||_ttD ]}|td krAt j  t }|	  q0t j  t | }	t
d ttd  |  | }
t
|||	|
  d S )Nrl   rw   rx   r{   r(   r    )r	   r   r   r   r   r   rw   synchronizetimer   r   )r   r   r   r   r   r   r   r   t0r   paramsr   r   r   test_benchmark_blockwise  s    


r   r   r;   c                 C   rm   )z%dim1_{0}_gtype_{1}_optim_{2}_mode_{3}ro   rq   r   r   r   rt     ru   zdim1, gtype, optim_name, modec                    sR  t jjt j fddtdD  }||}| }d }|dkr-t| d | }nt| d | }t j	t
dfdd	}t j  td
 d
}t j|d dd	|}t jdd|dfd }	t|D ]/}
t|
 ||
 }|
dkrt j  t }||}t jj||	|
  }|  |  qit j  t|t |  d S )Nc                    s   g | ]	}t j  qS r   )r	   nnLinear)rr   r   r   r   r   rt     s    z/test_stream_optimizer_bench.<locals>.<listcomp>r   r	   r   rl   g   ЈArw   )ry   r      )sizer   )r	   r   
Sequential
ModuleListr   r   rw   r   
parametersemptyintr   r   sleepr   randintr   
functionalcross_entropyr   backwardr   )r   r   r   modelayers1large_tensorr+   num_batchesbatcheslblsr   r   r   out1loss1r   r   r   test_stream_optimizer_bench  s2   &





r  )r   r   r   )2ctypesr   r!   r   r   	itertoolsr   os.pathr   r   r%   r   r	   bitsandbytesr;   bitsandbytes.functionalr   r   r   r   r    r#   r   r+   r   AdamW
PagedAdamW	PagedAdam	PagedLionr   r   r   r   float16r   r   optimizer_nameslistvaluesnamesmarkparametrizer   r   r   r   r   r   r   r  r   r   r   r   <module>   s    












E
-

}
W
