
    ^i5                        d Z ddlZddlmZ ddlmZmZmZmZm	Z	m
Z
mZmZmZmZ ddlZddlZddlZddlZddlmZ ddlmZ ddlmZ dd	lmZmZ dd
lm Z m!Z! ddl"m#Z#m$Z$m%Z%m&Z& ddl'm(Z( ddl)m*Z*m+Z+ ddl,m-Z-  ej\                  d      Z/eee0ef      Z1g dZ2 G d de$      Z3edejh                  dejj                  de
ejh                  e0f   fd       Z6edejh                  ddde
ejh                  df   fd       Z6dejh                  de	ejj                     de
ejh                  e	e0   f   fdZ6de0dz  dejh                  dejj                  dejj                  de	ejj                     de	ejj                     de
ejh                  ejj                  ejj                  e	ejj                     e	ejj                     f   fdZ7dee   dee0ee   f   fd Z8d!e1dee0ee   f   fd"Z9d#e1d$e	e#   d%e	e   d&e	e   de
ee0ee   f   e	eeef      f   f
d'Z:d#e	e1   de;fd(Z<d)e;dejz                  fd*Z>dAd+Z?dd,d-e	e   d%e	e   d.e	e   d/e@d0eAd#e	e1   d1eAd2e;d3e	eA   d4e	e%   d$e	e#   d5e	e   de&fd6ZBd-e	e   d%e	e   d.e	e   d/e@d0eAd2e;d#e	e1   d$e	e#   d5e	e   de%fd7ZCd8e;dede%fd9ZDd:eEd;eAd<eEd=eeA   d>ee0   d?eAd$e	e#   de
e%ee
e%e0f      f   fd@ZFy)Bz)Copyright 2019-2025, XGBoost contributors    N)Sequence)
AnyCallableDictListOptionalTupleTypeVarUnioncastoverload)	dataframe   )
collective)
Categories)FeatureNamesFeatureTypes)concatimport_cupy)BoosterDataIterDMatrixQuantileDMatrix)
is_on_cuda)get_model_categoriespick_ref_categories)	_RefErrorz[xgboost.dask])labelweightbase_marginqidlabel_lower_boundlabel_upper_boundc                        e Zd ZdZ	 	 	 ddee   dee   deee	e
f      dee   deee      ddf fd	Zd
edee   fdZdefdZddZdedefdZ xZS )DaskPartitionIterz.A data iterator for the `DaskQuantileDMatrix`.Ndatafeature_namesfeature_typesfeature_weightskwargsreturnc           	      F   t         t        d       f}|| _        t        D ]7  }t	        | ||j                  |d              t        t        | |      |      r7J  || _        || _	        || _
        t        | j                  t               sJ d| _        t        | 5  d       y )Nr   T)release_data)r   type_datametasetattrget
isinstancegetattr_feature_names_feature_types_feature_weights_itersuper__init__)	selfr&   r'   r(   r)   r*   typesk	__class__s	           R/var/www/html/hubwallet-dev/venv/lib/python3.12/site-packages/xgboost/dask/data.pyr:   zDaskPartitionIter.__init__4   s     4:&
 	7AD!VZZ401gdA.666	7
 ,+ /$**h///
d+    attrc                 N    t        | |      t        | |      | j                     S y N)r4   r8   )r;   rA   s     r?   _getzDaskPartitionIter._getM   s(    4*4&tzz22r@   c                 4    | j                   | j                     S )z5Utility function for obtaining current batch of data.)r/   r8   r;   s    r?   r&   zDaskPartitionIter.dataR   s    zz$**%%r@   c                     d| _         y)zReset the iteratorr   N)r8   rF   s    r?   resetzDaskPartitionIter.resetV   s	    
r@   
input_datac           	      8   | j                   t        | j                        k(  ryt        D ci c]  }|| j	                  |       }} |d| j                         d| j                  | j                  | j                  d| | xj                   dz  c_         yc c}w )zYield next batch of dataFN)r&   groupr'   r(   r)      T )	r8   lenr/   r0   rD   r&   r5   r6   r7   )r;   rI   r=   r*   s       r?   nextzDaskPartitionIter.nextZ   s    ::TZZ(+/0a!TYYq\/00 	
---- 11	
 	
 	

a
 1s   B)NNNr+   N)__name__
__module____qualname____doc__r   r   r   r   r   r   r   r:   strrD   r&   rH   r   boolrO   __classcell__)r>   s   @r?   r%   r%   1   s    8
 15CG)-,3i,  -,  lJ&> ?@	,
 "#, 49%, 
,2 # 
&c &x D r@   r%   dfcolr+   c                      y rC   rM   rX   rY   s     r?   _add_columnr\   m   s    ORr@   c                      y rC   rM   r[   s     r?   r\   r\   q   s    KNr@   c                     || |fS d}|j                    d| }|| j                  v r%|dz  }|j                    d| }|| j                  v r% | j                  di ||i} | |fS )Nr   _rL   rM   )namecolumnsassign)rX   rY   trailsuids       r?   r\   r\   u   s     {3wFXXJax
 C

!
!F8$ 
 
	 c3Z	 Bs7Nr@   devicer!   ysample_weightr    c                    t        |      \  }t        |      \  }t        |      \  }t        |      \  }	| | dk(  rdnd}
t        j                  j                  d|
i      5  j	                         |   j                  d      j                  j                         j                  j                  |<   j                  |      j                  |      |   j                         }|j                  j                         j                  j                         }t!        |      }t#        ||d   d	z   gz         }j%                  |d
|      j	                         ddd       |   }|   }fd||	fD        \  }}||||	fD cg c]  }||	 }}j'                  |d	      j	                         ||||fS # 1 sw Y   axY wc c}w )zA function to prevent query group from being scattered to different
    workers. Please see the tutorial in the document for the implication for not having
    partition boundary based on query groups.

    Ncpup2ptaskszdataframe.shuffle.methodcategory)byrL   F)drop	divisionsc              3   `   K   | ]%  }|t        t        j                  |         nd  ' y wrC   )r   ddSeries).0rd   rX   s     r?   	<genexpr>z!no_group_split.<locals>.<genexpr>   s.      "BECORYY3 ="s   +.)axis)r\   daskconfigsetpersistastypecatas_knowncodessort_valuesgroupbycountindexcomputevaluestolistsortedtuple	set_indexro   )re   rX   r!   rf   rg   r    qid_uidy_uidw_uidbm_uidshufflecntdivrd   uidss    `             r?   no_group_splitr      s     b#&KBB"IBB.IBR-JB ~5egG	4g>	? ZZ\k((488AACGGMM7^^w^'jj!'*002ii!((//1SkC3r7Q;-'(\\  
 ')	 	" W+C
5	A"JOQW"M; $UE6:NCcoCNDN	A		&	&	(BsA}k113 . Os   *C<GGGGr*   c                       j                  d      }|J t        |      }t        |d         rddlm nddlm dt        dt        t        t        f   f fddt        dt        j                  ffd}t        |      D cg c]
  } ||       }}t        |      }|j                  j                  r S t        j!                  d	t#        j$                         |       t        |      }t        |      r&t'               }|j)                  |j                        }	nt+        j(                  |j                        }	|j,                  |	ddf   }t/        |d
      r|j,                  |	ddf   }n	||	ddf   } j1                  d|gi       t3        |j4                        D ]"  \  }}
|
 v sJ  j1                  |
||
   gi       $  S c c}w )z>Sort worker-local data by query ID for learning to rank tasks.r&   Nr   )	DataFrameir+   c           
          dt         t        t              dt         t           f fd}t        D ci c]  }| |j                  |d             }}|j                         D ci c]  \  }}|	|| }}}|S c c}w c c}}w )zDReturn a dictionary containing all the meta info and all partitions.rA   r+   c                     | |    S y rC   rM   )rA   r   s    r?   rD   z0sort_data_by_qid.<locals>.get_dict.<locals>._get   s    Awr@   N)r   r   r   listr0   r2   items)r   rD   r`   data_optr=   vr&   r*   s   `      r?   get_dictz"sort_data_by_qid.<locals>.get_dict   s    	xS	* 	x~ 	
 DHH4D$vzz$566HH!)!1CAQ]1CC ICs    A<'
B2Bc                 $     |       } |      S rC   rM   )r   r&   r   r   s     r?   map_fnz sort_data_by_qid.<locals>.map_fn   s    {r@   a  [r%d]: Sorting data with %d partitions for ranking. This is a costly operation and will increase the memory usage significantly. To avoid this warning, sort the data based on qid before passing it into XGBoost. Alternatively, you can use set the `allow_group_split` to False.iloc)r2   rN   r   cudfr   pandasintr   rU   r   pdranger   r!   is_monotonic_increasingLOGGERwarningcollget_rankr   argsortnpr   hasattrupdate	enumeratera   )r*   
data_partsn_partsr   r   
meta_partsdfqdfxcp
sorted_idxcr   r   s   `          @@r?   sort_data_by_qidr      s   F#J!!!*oG*Q- "$C DdO # ",,  &+7^4&)4J4

C
ww&&
NN	T 	" 
C#]ZZ(
ZZ(

((:q=
!CsFhhz1}%*a- 
MM6C5/"#++& %1F{{q3q6(m$% MS 5s   Glist_of_partsc                      t         t              sJ i dt        dt        ddf fd}t	               D ]"  \  }} ||d       t
        D ]  } |||        $ j                  dd      }|t        di S )	z8Convert list of dictionaries into a dictionary of lists.r   r`   r+   Nc                 j    ||    v r	|    |   }nd }||vrg |<   |   j                  |       y y rC   )append)r   r`   partr   results      r?   r   z!_get_worker_parts.<locals>.append  sR    =## #D)DD6!!t4L% r@   r&   r!   rM   )r3   r   r   rU   r   r0   r2   r   )r   r   r   r_   r=   r!   r   s   `     @r?   _get_worker_partsr      s    mT***#%F&# &S &T & -( 1q& 	A1aL	
 **UD
!C
!+F+Mr@   partsmodelr(   xy_catsc                 j    t        |       }|d   d   }t        |||      \  }}t        |||      }||fS )Nr&   r   )r   r   r   )r   r   r(   r   unzipped_dictXr_   
model_catss           r?   _extract_datar     sH     &e,Mfa A(E=AMAz$Q
G<J*$$r@   c                    | t        | d   j                  d            }nd}t        t        j                  t        j                  |gt
        j                        t        j                  j                        d         }|S )Nr   r&   F)dtype)
r   r2   rV   r   	allreducer   arrayint32OpMAX)r   is_cudas     r?   _get_is_cudar   '  s]    U1X\\&124>>"((G9BHH"Etww{{STUVWGNr@   r   c                 n    | rt               }|j                  d      }|S t        j                  d      }|S )N)r   r   )r   emptyr   )r   r   r   s      r?   _make_emptyr   1  s6    ]  L  Lr@   c                  l    t        j                         } t        j                  d| j                         y )NzWorker %s has an empty DMatrix.)distributed
get_workerr   r   address)workers    r?   _warn_emptyr   :  s#    ##%F
NN4fnnEr@   )refr'   r)   missingnthreadmax_binenable_categoricalmax_quantile_batchesr   Xy_catsc           
          t        |      }|%t                t        t        |      | |||	||      S t	        ||
||      \  }}t        t        di ||| |d||||	||      S )N)r'   r(   r   r   r   r   )r(   r'   r)   )r   r   r   r   r   r   rM   )r   r   r   r   r   r%   )r'   r(   r)   r   r   r   r   r   r   r   r   r   r   r   r   s                  r?   _create_quantile_dmatrixr   ?  s     5!G} ''1!5
 	
 !.eUM7 SM: 	
	
$'+		
 -1 r@   c        	         H   t        |      }	|"t                t        t        |	      | ||      S t	        d      }
dt
        t        |
      dt        |
   fd}t        ||||      \  }}i }|j                         D ]  \  }} ||      }|||<    t        di ||| ||||dS )zdGet data that local to worker from DaskDMatrix.

    Returns
    -------
    A DMatrix object.

    )r'   r(   r   Tr&   r+   c                 >    t        d | D              ry t        |       S )Nc              3   $   K   | ]  }|d u  
 y wrC   rM   )rt   r   s     r?   ru   z:_create_dmatrix.<locals>.concat_or_none.<locals>.<genexpr>  s     -tt|-s   )anyr   )r&   s    r?   concat_or_nonez'_create_dmatrix.<locals>.concat_or_none  s    ---d|r@   )r   r'   r(   r   r   r)   rM   )	r   r   r   r   r
   r   r   r   r   )r'   r(   r)   r   r   r   r   r   r   r   r   r   r   r   concated_dictkeyvaluer   s                     r?   _create_dmatrixr   m  s    & 5!G} ''1	
 	
 	AXhqk2 x{ 
 !.eUM7 SM:$&M#))+ 
U5!c  
# -' r@   is_quantilec                 2    | rt        di |S t        di |S )NrM   )r   r   )r   r*   s     r?   _dmatrix_from_list_of_partsr     s!    '1&11$V$$r@   	train_reftrain_idrefsevals_id
evals_name	n_threadsc          
         t        di | ||d d}g }|j                         }	t        |      D ]  \  }
}||
   |k(  r|j                  |||
   f       %|j	                  dd       -|d   |k7  rt        t              |d= t        di ||||	|d}nt        di |||	|d}|j                  |||
   f        ||fS )N)r   r   r   r   )r   r   r   r   )r   r   r   rM   )r   get_categoriesr   r   r2   
ValueErrorr   )r   r   r   r   r   r   r   Xyevalsr   r   r   eval_xys                r?   _get_dmatricesr     s    
% 


&eT
B
 (*E!GD/ /3A;("LL"jm,-775$+5zX% ++E
1 (b'G 2 ('G 	gz!}-.#/$ u9r@   rP   )GrT   loggingcollections.abcr   typingr   r   r   r   r   r	   r
   r   r   r   rw   r   numpyr   r   r   r   rr    r   r   _data_utilsr   _typingr   r   compatr   r   corer   r   r   r   r&   r   sklearnr   r   trainingr   	getLoggerr   rU   
_DataPartsr0   r%   r   rs   r\   r   r   r   r   rV   r   ndarrayr   r   floatr   r   r   r   dictr   rM   r@   r?   <module>r     sj   /  $         ! $ 0 ( > >  ?  			+	,$sCx.!
9 9x 
 RBLL Rryy RU2<<;L5M R 
 R 
 NBLL Nt NbllD6H0I N 
 N
#BII.
2<<#&' 02$J02
02 
02 
yy	02
 BII&02 "))$02 LL"))RYY(;Xbii=PP02fEtCy ET#tCy.-A EPZ Dd3i4H 4
%
%G
% L)
% j!	
%
 4T#Y%j0H*I!JJK
%,   "** F  "+L)+ L)+ c]	+
 + + J+ + + #3-+ 
'	+ G+ j!+ +\3L)3 L)3 c]	3
 3 3 3 J3 G3 j!3 3l%T %S %W %$$$ $ sm	$
 $ $ G$ 7Dw|,--.$r@   