
    ^i\1                        d Z ddlmZmZmZmZmZmZ ddlZ	ddl
ZddlmZ ddlmZ ddlmZmZ ddlmZ ddlmZ ddlZddlmZ dd	lmZ dd
l m!Z! ddl"mZ# ddl$m%Z% ddlm&Z& ddl'm(Z( ddl)m*Z* ddl)m+Z, ddl-m.Z. ddl/m0Z0m1Z1 de2ded   deddfdZ3de2ded   deddfdZ4de2ded   deddfdZ5dede2ded   de6ddf
dZ7de6de6de2d e8d!e9ddfd"Z:dede6defd#Z;dedee2   fd$Z<ded%e6d&e6d'e6d(e6de2deejz                  ej|                  ej|                  f   fd)Z?dede2ddfd*Z@d+e	j                  d,ded%e6d&e6d-e6d.e9d/e	j                  j                  deejz                  ej|                  f   fd0Z+dede0ddfd1ZCy)2z0Tests for dask shared by different test modules.    )AnyListLiteralTupleTypecastN)array)	dataframe)Client
get_worker)parse)make_classification)concat)get_basescore   )dask)EvalsLog)_get_rabit_args)_DASK_VERSION   )make_batches)make_categorical)make_recoded)Deviceassert_allclosetree_methoddevice)cpucudaclientreturnc                    t        ddd      \  }}t        j                  dd| |      }|j                  ||       t	        |      }t        j                  |      j                  d      }t        j                  |      j                  d	      }t        j                  dd| |      }	||	_
        |	j                  ||       t	        |	      }
t        j                  j                  ||
       y
)z%Test init estimation for classsifier.          	n_samples
n_featuresrandom_stater   n_estimators	max_depthr   r   r$   Nchunksr$   N)r   xgbXGBClassifierfitr   da
from_arrayrechunkdxgbDaskXGBClassifierr    nptestingr   )r   r   r    Xyclf
base_scoredxdydclfdbase_scores              U/var/www/html/hubwallet-dev/venv/lib/python3.12/site-packages/xgboost/testing/dask.pycheck_init_estimation_clfrD      s     btTDAq


!VC GGAqMs#J	q		!	!	!	4B	q		!	!	!	/B!!	D DKHHR%KJJz;7    c                    ddl m}  |ddd      \  }}t        j                  dd| |      }|j	                  ||       t        |      }t        j                  |      j                  d	
      }t        j                  |      j                  d
      }	t        j                  dd| |      }
||
_        |
j	                  ||	       t        |
      }t        j                  j                  ||       y)z#Test init estimation for regressor.r   )make_regressionr#   r$   r%   r&   r   r*   r-   r.   r0   N)sklearn.datasetsrG   r1   XGBRegressorr3   r   r4   r5   r6   r7   DaskXGBRegressorr    r9   r:   r   )r   r   r    rG   r;   r<   regr>   r?   r@   dregrB   s               rC   check_init_estimation_regrM   6   s     1 X"4PDAq


!VC GGAqMs#J	q		!	!	!	4B	q		!	!	!	/B  !VD DKHHR%KJJz;7rE   c                 8    t        | ||       t        | ||       y)zTest init estimation.N)rM   rD   )r   r   r    s      rC   check_init_estimationrO   O   s     k66:k66:rE   	n_workersc           	         |dk\  sJ | j                         5  t        j                  ||      }t        j                  t        d      t        ddd      d      }t        j                  g dgdz  dgdz        }t        j                  |j                  d	d
ddf<   | j                  |       |j                  t        j                  ||      t        j                  ||             d	d	d	       y	# 1 sw Y   y	xY w)z0Issue #9271, not every worker has missing value.r   )r   r   i'  r   )abi  r   Ni  i  rS   )rP   npartitions)
as_currentr7   r8   pd	DataFramerangeSeriesr9   nanlocwait_for_workersr3   ddfrom_pandas)r    r   r   rP   r=   r;   r<   s          rC   check_uneven_nanra   W   s     >>				 
$$VLLLuU|%q"2EFGII0d
0aS4Z01!#jtDj#o)4NN1)4NN1)4	

 
 
s   CC66C?	worker_id	comm_argsis_qdmc           
          d}d}d}|dk7  }t               j                  j                  }	t        j                  j
                  dddi|5  t        j                  t        |||||       ddi}
|rt        j                  |
|		      }nt        j                  |
|		      }i }t        j                  d
|	|d||dfgd|       t        j                  t        t        t           |d   d               sJ 	 ddd       g g g }}}t!        |      D ]I  }t        |||||      \  }}}|j#                  |       |j#                  |       |j#                  |       K t%        |      }t%        |      }t%        |      }|rt        j&                  ||||	      }nt        j                  ||||	      }i }t        j                  d
|	|d||dfgd|       t(        j*                  j-                  d   d   |d   d   d       y# 1 sw Y   xY w)z-Basic checks for distributed external memory.r$         r   dmlc_communicatorrabit)use_cupyr)   cache)nthreadhist)r   rl   r   Train)evalsnum_boost_roundevals_resultrmseN)weightrl   g-C6?rtol )r   statenthreadsr1   
collectiveCommunicatorContexttmIteratorForTestr   ExtMemQuantileDMatrixDMatrixtrainnon_increasingr   r   floatrZ   extendr   QuantileDMatrixr9   r:   r   )rb   rP   r   rc   rd   n_samples_per_batchr(   	n_batchesrj   	n_threadsitXyresultslxlylwixr<   wr;   yconcatwconcatresults_locals                           rC   check_external_memoryr   l   s$    JIH""++I		+	+	Sg	S	S N#!&	
 	
 !77INBR3B		"yFK=/ 	
   d5k773CF3K!LMMM/N2 RBB9 

1a 			!
		!
		!
 	r
ARjGRjG  GGYO[[GGYG MII9G
G}o" JJ -"8"@t  mN Ns   B*HHc                 0    | j                  t        | |      S )z6Get RABIT collective communicator arguments for tests.)syncr   )r    rP   s     rC   get_rabit_argsr      s    ;;	::rE   c                     t               t        d      k\  rddini } | j                  di |d   }t        |j	                               S )zGet workers from a dask client.z2025.4.0rP   rR   workersrv   )r   parse_versionscheduler_infolistkeys)r    kwargsr   s      rC   get_client_workersr      sH    "//]:5N"Nk2TVF#f##-f-i8GrE   r'   r(   n_query_groupsmax_relc                   t        |       }|t        |      z  }|dk(  rddlm nddlm dt
        dt
        dt        j                  ffd}g }	d}
t        d||      D ]=  }| j                  |||||
t        |      z     g      }|	j                  |       |
d	z  }
? ||t        |      z  z
  }|dk7  r1| j                  |||t        |      z  
      }|	j                  |        |d	d      }t        j                  |	|      }t        |t        j                        sJ |j                  ddgd	      |j                  |j                  fS )z'Synthetic dataset for learning to rank.r   r   )rY   nseedr!   c                     t         j                  j                  |      }t        | 

d	      \  }}|j	                  | fd      } |t        
      D cg c]  }d| 	 c}      }||d<   ||d<   |S c c}w )Nr   )n_informativen_redundant	n_classes)sizelowhighf)columnsqidr<   )r9   randomdefault_rngr   integersrZ   )r   r   rngr;   r<   r   r   dfDFr   r(   r   s           rC   makezmake_ltr.<locals>.make   s    ii##D)"zg
1 ll!.lAU:->?asG?@5	3	 @s   A9
)r   r   r   r   )r   r   metar   r<   )axis)r   lenpandasrY   cudfintrX   rZ   submitappendr_   from_delayed
isinstancedropr<   r   )r    r'   r(   r   r   r   r   n_samples_per_workerr   futuresr   kfutlastr   r   r   s     ```           @rC   make_ltrr      sT    !(G$G4*(	 	3 	2<< 	 	 G	A1i!56 mm(q71s7|CS;T:U  
 	s	Q ,s7|;<DqymmDD/Cc'l/RmSs1:D	t	,Bb",,'''77E3<a7("$$66rE   c                    t        | dddd|      \  }}}t        | dddd|      \  }}}t        j                  dd|d	
      }|j                  |||||f||fg||gd       |j                  dk(  sJ |j
                  d   |j                  k(  sJ |j                         d   d   }	t        j                  |	dd d      sJ |	       t        j                  j                  |	d   dd       y)z)Test for the allow_group_split parameter.i      rf      )r   r   r   i   F$   zrank:pairwise)allow_group_splitr+   r   	objectiveT)r   eval_seteval_qidverboser   validation_0zndcg@32Nrg   g{Gz?)	tolerancerR   g      ?rt   )r   r7   DaskXGBRankerr3   n_features_in_shaperq   r{   non_decreasingr9   r:   r   )
r    r   X_trq_try_trX_vaq_vay_valtrndcgs
             rC   check_no_group_splitr      s   c!QvD$  c!QvD$ 

!	C GG,t-   $$$::a=C.....n-i8DT#2Y$7==7JJtBx48rE   F)onehot	cat_dtypen_categoriesr   r   c          
      "   t        |       }t        |      }g }dt        dt        j                  fd}	 |	d||d|      }
t        |      D ]S  \  }}t        ||z  ||||z  z  z
        }|dk(  r|dk(  rd}| j                  |	||||d|g      }|j                  |       U t        t        j                  t        j                  ||
	            }|d
   }||j                  j                  d
g         }|rt        j                  |      |fS ||fS )z&Synthesize categorical data with dask.r   r!   c                  ,    t        di | \  }}||d<   |S )Nlabelrv   )make_cat_local)r   r;   r<   s      rC   packzmake_categorical.<locals>.pack  s!    ''1'
rE   r   F)r'   r(   r   r   r   r   )r'   r(   r   r   r   r   r   r   )r   r   r   r_   rY   	enumerateminr   r   r   r   r   
differenceget_dummies)r    r'   r(   r   r   r   r   rP   dfsr   r   r   workerl_n_samplesfuturer   r<   r;   s                     rC   r   r     s8    !(GGI
Cs r|| 
 !D w' 	6"IY)5K0L$L
 >a1fK!!%H  
 	

6!$ BLL"//#D*IJB
7A
2::  '+,A~~a !##a4KrE   c                     dt         t        j                     dt        dt        dt        j                  fddt         t        j                     ddf fd}t        j                  t        j                  fD ]
  } ||        y)z+Run re-coding test with the Dask interface.DMatrixTargsr   r!   c                     | t         j                  u r|j                  dd       } | |d|i|S |j                  dd         | |i |S )Nref)r7   DaskQuantileDMatrixpop)r   r   r   r   s       rC   create_dmatrixz"run_recode.<locals>.create_dmatrixK  sU     t///**UD)CT5s5f55

5$(((rE   Nc                 t   t        d      \  }}}}}t              }t        j                  |d      j	                  |      t        j                  |d      j	                  |      t        j                  ||j                  d   dz  f      j	                  |      }}} | ||d	      }	 | ||d|	
      }
t        j                  di|	|
dfg      } | ||d	      }	 | ||d|	
      }
t        j                  di|	|
dfg|d         } | ||d	      }	 | ||d|	
      }
t        j                  di|	|
dfg|d         }t        j                  j                  |d   d   d   |d   d   d          t        j                  ||      j                         }t        j                  ||      j                         }t        ||       t        j                  ||	      j                         }t        j                  ||
      j                         }t        ||       y )N`   )r(      rU   )r   r   r.   T)enable_categorical)r   r   r   Valid)ro   booster)ro   	xgb_modelhistoryrr   )r   r   r_   r`   persistr4   r5   r   r7   r   r9   r:   r   inplace_predictcomputepredict)r   encreencr<   _todencdreencr@   r   Xy_validr   	results_1	results_2predt_0predt_1r    r   r   s                   rC   runzrun_recode.<locals>.runU  s]   *6bAUAq!' NN3A.66r6BNN5a0888DMM!QWWQZ1_$67???K f HfdB4P!ffbTr
 **Xv&Hg3F2G

 HfdB4P!ffbTr
 JJvg&'i(
	 HffbTR!fdB4R
 JJvg&'i(
	 	

""i )&19Y3G3PQW3X	
 &&vw=EEG&&vw?GGI1,,vw3;;=,,vw9AAC1rE   )r   r7   DaskDMatrixr   r   )r    r   r  r   r   s   ``  @rC   
run_recoder  H  s    )t''()14)@C)			)62d4++, 62 62p %%t'?'?@ HrE   )D__doc__typingr   r   r   r   r   r   numpyr9   r   rX   r   r	   r4   r
   r_   distributedr   r   packaging.versionr   r   rH   r   xgboostr1   xgboost.testingr:   r{   xgboost.compatr   xgboost.testing.updaterr    r7   _typingr   r   
dask.utilsr   datar   r   r   ordinalr   utilsr   r   strrD   rM   rO   r   ra   dictboolr   r   r   rY   r[   r   r   int64	DTypeLiker  rv   rE   rC   <module>r%     sv   6 8 8      * 4 0   ! 1   " &  4 ! *88%m48>D8	8288%m48>D8	82;;%m4;>D;	;

!$
.5m.D
QT
	
*FFF F 	F
 F 
FR;6 ;c ;c ;
 v  $s)  .7.7.7 .7
 .7 .7 .7 2<<BII-..7b9 9 9 9J %'XX333 3 	3 3 yy""3 2<<"#3nFv Fv F$ FrE   