
    ^i                        d Z ddlZddlZddlZddlZddlZddlmZ ddlm	Z	 ddl
mZmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZ ddlZddlZddlm
Z ddlmZ dd	l m!Z! d
dl"m#Z#m$Z$m%Z% d
dl&m'Z'm(Z( d
dl)m*Z*m+Z+ d
dl,m-Z. erddl/m0Z1 neZ1 ejd                  d      Z3e3ji                  dd      Z5de6de6deeeejn                  ejn                  f   ee8e8f   f   ddf   fdZ9defdZ:defdZ;deddfdZ<e5jz                  deejn                  ejn                  f   fd       Z>e5jz                  deejn                  ejn                  f   fd       Z?e5jz                  deejn                  ejn                  f   fd       Z@e5jz                  deejn                  ejn                  f   fd       ZAe5jz                  dee1ejn                  f   fd       ZBe5jz                  d eCdee!j                  ejn                  ejn                  e!j                  ejn                  ejn                  e!j                  ejn                  ejn                  f	   fd!       ZE	 dVd"d#d$d%e6de6d&e6d'eFd(eFd)e6deeejn                     eejn                     eejn                     f   fd*ZGee!j                  ej                  ej                     ej                  ej                     f   ZJe	 G d+ d,             ZK G d- d.e      ZL G d/ d0      ZMd1ej                  ej                     deej                  ej                  ej                  f   fd2ZN	 dWd3e!j                  d4ej                  ej                     d5ej                  ej                     d6eOdej                  ej                     f
d7ZQd8ee!j                  ej                  ej                     ej                  ej                     f   d9ej                  ej                     deKfd:ZRd;eLdeeKeeK   f   fd<ZSd3e!j                  d4ej                  ej                     d5ej                  ej                     d=ej                  ej                     d>ej                  ej                     dee!j                  ej                  ej                     ej                  ej                     ej                  ej                     f   fd?ZUd@edAee$   dBeCddfdCZVe5jz                  de6de6dDeOdEeFdeee!j                     ejn                  f   f
dF       ZWdGe6dHe6deeC   fdIZXdJdKd"d#ej                  dLdMde6de6dNe6dOeFdDeOdPeOdQeFd)e6dRej                  j                  dBeCdee*ejn                  f   fdSZZ G dT dUe#      Z[y)XzUtilities for data generation.    N)ThreadPoolExecutor)	dataclass)TYPE_CHECKINGAnyCallableDict	GeneratorList
NamedTupleOptionalSequenceSetTupleTypeUnion)request)typing)r	   )sparse   )DataIterDMatrixQuantileDMatrix)is_pd_cat_dtypepandas_pyarrow_mapper)	ArrayLike	XGBRanker)train)	DataFramejoblibz
./cachedir)verbose	n_samples
n_featuresreturnc              #     K   t        j                  d      }t        j                  j	                  d      }|j                  dd| |z        j                  | |      }t        j                  t        j                  t        j                  t        j                  t        j                  t        j                  t        j                  t        j                  t        j                  t        j                   t        j"                  t        j$                  t        j&                  t        j(                  t        j*                  t        j,                  t        j.                  t        j0                  t        j2                  t        j4                  g}|D ]A  }t        j6                  ||      }||f |j9                         |j9                         f C |D ]A  }t        j6                  ||      }|j;                  |      }|j;                  |      }	||	f C |j=                  dd| |z  	      j                  | |      }t        j>                  t@        fD ]  }
t        j6                  ||
      }||f ! t        j>                  t@        fD ]A  }t        j6                  ||      }|j;                  |      }|j;                  |      }	||	f C y
w)z*Enumerate all supported dtypes from numpy.pandas  r      lowhighsizedtype   g      ?r+   N)!pytestimportorskipnprandomRandomStaterandintreshapeint32int64byteshortintcint_longlonguint32uint64ubyteushortuintcuint	ulonglongfloat16float32float64halfsingledoublearraytolistr   binomialbool_bool)r!   r"   pdrngorigdtypesr-   Xdf_origdfdtype1dtype2s               U/var/www/html/hubwallet-dev/venv/lib/python3.12/site-packages/xgboost/testing/data.py	np_dtypesrZ   /   s     
		X	&B
))


%C;;13Y-C;DLL:D 	






		
		

		













		
		)F,  (HHT'AgkkmQXXZ''(
  HHT',,t$\\!_rk	 <<3Y%;<<DD:D 88T" HHT(Ag 88T" HHT(,,t$\\!_rk	s   KKc            	   #   8  K   t        j                  d      } | j                         | j                         | j	                         | j                         | j                         | j                         | j                         | j                         g}t        j                  }| j                  dd|dgdd|dgdt        j                        }t        j                  d| j                  fD ]-  }|D ]&  }| j                  dd|dgdd|dgd|      }||f ( / t        j                  }| j                         | j!                         g}| j                  d	d
|dgdd
|d	gdt        j                        }t        j                  d| j                  fD ]m  }|D ]f  }| j                  d	d
|dgdd
|d	gd|      }||f |d   }|d   }t#        || j$                        sJ t#        || j$                        sJ ||f h o |j'                  d      }|j(                  D ]'  }||   j*                  j-                  t.              ||<   ) t        j                  d| j                  fD ]4  }| j                  dd|dgdd|dgd| j1                               }||f 6 d| j                  fD ]i  }dd|dgdd|dgd}	| j                  |	|t        j2                  n| j5                               }| j                  |	| j5                               }||f k yw)z/Enumerate all supported pandas extension types.r%   r.   r         f0f1r,   N      ?g       @g      @r_   categoryTF)r0   r1   
UInt8DtypeUInt16DtypeUInt32DtypeUInt64Dtype	Int8Dtype
Int16Dtype
Int32Dtype
Int64Dtyper2   nanr   rF   NAFloat32DtypeFloat64Dtype
isinstanceSeriesastypecolumnscatrename_categoriesintCategoricalDtyperN   BooleanDtype)
rP   rS   NullrR   r-   rV   ser_origsercdatas
             rY   	pd_dtypesr}   j   s*    			X	&B 	






	F %'FFD<<1dAq!T1o6bjj  D ruu%  	E1dAq!T1o>e  B (N		 66Doo!23F<<S$$S#tS,AB"**  D ruu% 
  		 ES$,S#tS4IJRW  B (NDzHT(Cc299---h		222C-		 
  ;;z"D\\ 5q'++//4Q5ruu% \\q$?1aq/:%%'  
 Bh ruu UD$/tT47PQ||DDLbooFW|X\\$boo&7\8Bhs   LLc            	   #     K   t        j                  d      } t        j                  d      }t        }d| j                  dfD ]  }|D ]  }|j	                  d      s|j	                  d      r&| j                  |      s|dk(  r|nt        j                  }| j                  dd|d	gd
d	|dgdt        j                        }| j                  dd|d	gd
d	|dgd|      }||f   | j                  dfD ]o  }| j                  dd|dgdd|dgd| j                               }| j                  dd|dgdd|dgd| j                  |j                                     }||f q yw)z*Pandas DataFrame with pyarrow backed type.r%   pyarrowNr   rE   rO   r.   r   r\   r]   r^   r,   FT)r0   r1   r   rl   
startswithisnar2   rk   r   rF   rw   
ArrowDtyperN   )rP   parS   rx   r-   	orig_nullrR   rV   s           rY   pd_arrow_dtypesr      s    			X	&B			Y	'B #F. ruua   	E	*e.>.>v.F$&GGDMdaiRVVI<<1i+Aq)Q3GHjj   D
 1dAq!T1o>e  B (N	"  	||%t,UD$4MN//#  
 \\%t,UD$4MN--
+  
 Bh	s   E(E*rQ   c                    | j                  d      j                  dd      }| j                  d      }t        j                  |d<   t	        j
                  t        d      5  t        ||       ddd       t	        j
                  t        d      5  t        ||       ddd       y# 1 sw Y   :xY w# 1 sw Y   yxY w)	zValidate there's no inf in X.    r/      r]   )   r   zInput data contains `inf`matchN)	r3   r6   r2   infr0   raises
ValueErrorr   r   )rQ   rT   ys      rY   	check_infr      s    


##Aq)A


AffAdG	z)D	E 1 
z)D	E 1   s   #B)B5)B25B>c                     dt         j                  j                  d      t        j                  d      } dt
        t           dt
        t           dt
        t           dt         j                  ffd}d	t        d
t        dt         j                  ffd}| j                   |ddgddgddg       |ddgddgddg       |dd       |dd       |dd       |dd        |d!d"       |d#d$       |d%d&      d'	      }||j                  j                  d(g         j                         }|d(   j                         }||fS ))zSynthesize a dataset similar to the sklearn California housing dataset.

    The real one can be obtained via:

    .. code-block::

        import sklearn.datasets

        X, y = sklearn.datasets.fetch_california_housing(return_X_y=True)

    iP  i  r%   meanssigmasweightsr#   c                     j                  t        |d   z        | d   |d         }j                  |j                  d   z
  | d   |d         }t        j                  ||gd      S )Nr   )r+   locscaler.   axis)normalru   shaper2   concatenate)r   r   r   l0l1r!   rQ   s        rY   mixture_2compz-get_california_housing.<locals>.mixture_2comp  sv     ZZi'!*,-E!HF1I  
 ZZi"((1+5E!HFSTIZV~~r2hQ//    meanstdc                 .    j                  | |f      S )Nr   r   r+   )r   )r   r   r!   rQ   s     rY   normz$get_california_housing.<locals>.norm  s    zzd#YLzAAr   g5ŀ]g~(Fv^gr-|E?g3mE^1?gDi-T?gÅv-W?gXcB@g&	@@g6?g](?g8W nx?gd?g|["@g2{e?)r   r   gVb<@g>+)@gZK@g@g)P=?g˧^T?g/E@g@gI@gtbO$@gg9h @gk}v?)		LongitudeLatitudeMedIncHouseAgeAveRooms	AveBedrms
PopulationAveOccupMedHouseValr   )r2   r3   default_rngr0   r1   r
   floatndarrayr   rr   
differenceto_numpy)rP   r   r   rV   rT   r   r!   rQ   s         @@rY   get_california_housingr      sv    I
))


%C			X	&B0E{0$(K0:>u+0	0B5 Bu B B 
&}-#%78Z(
 &k*#%78Z(
  28JK"4:LM"39JK#4:MN$6<MN"4:LM%6<NO#	

B* 	2::  -12;;=A
=""$Aa4Kr   c                  |    t        j                  d      } | j                         }|j                  |j                  fS )z&Fetch the digits dataset from sklearn.sklearn.datasets)r0   r1   load_digitsr|   target)datasetsr|   s     rY   
get_digitsr   *  s6     ""#56H!D99dkk!!r   c                  P    t        j                  d      } | j                  d      S )z-Fetch the breast cancer dataset from sklearn.r   T)
return_X_y)r0   r1   load_breast_cancer)r   s    rY   
get_cancerr   2  s)     ""#56H&&$&77r   c                     t        j                  d      } t        j                  j	                  d      }d}d}| j                  ||      \  }}|j                  d||j                        }t        |j                  d         D ]<  }t        |j                  d         D ]  }|||f   st        j                  |||f<   ! > ||fS )zGenerate a sparse dataset.r      i  g      ?)random_stater.   r   )
r0   r1   r2   r3   r4   make_regressionrM   r   rangerk   )	r   rQ   nsparsityrT   r   flagijs	            rY   
get_sparser   9  s     ""#56H
))


$CAH##AC#8DAq<<8QWW-D1771: !qwwqz" 	!AAqDz&&!Q$	!! a4Kr   c                     t         rddlnt        j                  d      t        j
                  j                  d      dj                         } dt        t        t        t        f   t        f   dt        dj                  ffd	} |d
dddddd      | d<    |ddddd      | d<    |dddddd      | d<    |ddd d!d"d#d$d%d      | d&<    |d'd(d)d!d*d+      | d,<    |d-d(d.d/d0d"d1d2d3d      | d4<    |d5d6d7d8d9d:d;      | d<<    |d=d>d?d@d$dAd      | dB<    |dCdDdd"dEd      | dF<    |d@dGdGdHdI      | dJ<   dKt        dLt        dt        dj                  ffdM} |dNdOd      | dP<    |dQdRd      | dS<    |dTdUd      | dV<    |dWdXd      | dY<    |dZd[d      | d\<    |d]d^d      | d_<    |d`dad      | db<    |dcddd      | de<    |dfdgd      | dh<    |didjd      | dk<   t        | j                        }j                  |       | |   } t	        j                   fl      }| j                  D ]q  }t#        | |   j$                  j&                        r:|| |   j(                  j*                  j-                  t        j.                        z  }`|| |   j0                  z  }s |dm|j3                         z  z  }|dn|j5                         z
  z  }| |fS )oam  Get a synthetic version of the amse housing dataset.

    The real one can be obtained via:

    .. code-block::

        from sklearn import datasets

        datasets.fetch_openml(data_id=42165, as_frame=True, return_X_y=True)

    Number of samples: 1460
    Number of features: 20
    Number of categorical features: 10
    Number of numerical features: 10
    r   Nr%   r&   i  
name_probadensityr#   c           	         t        	d|z
  z        }t        j                  d|z
        dkD  xr |dkD  }|rd|z
  }|| t        j                  <   t	        | j                               }t	        | j                               }|dxx   dt        j                  |      z
  z  cc<   j                  |	|      }
j                  |
j                  t        d |                  }|S )	Nr.   ra   ư>r   )r+   pc                 "    t        | t              S N)ro   str)xs    rY   <lambda>z5get_ames_housing.<locals>.synth_cat.<locals>.<lambda>v  s    As!3 r   r,   )ru   r2   absrk   listkeysvaluessumchoicerp   rv   filter)r   r   n_nullshas_nanr   r   r   r   seriesr!   rP   rQ   s            rY   	synth_catz#get_ames_housing.<locals>.synth_catd  s     i1w;/0&&w'$.>7Q;W}H!)JrvvJOO%&""$%	"rvvay JJt)qJ1%%3T:  
 r   gqu ]?gqh.?gsmB<?g5C(?goEb?)1Fam2fmConDuplexTwnhsTwnhsEra   BldgTypegwD?g. ҥ?g)$;?)UnfRFnFing_9?GarageFinishgW歺?gbFx{?gbFx{?gQfL2rf?)CornerCulDSacFR2FR3	LotConfigg?g/ؗ?gf׽?g$A
?g5e?g() l?g[iF?)TypMin2Min1ModMaj1Maj2Sev
Functionalg M?g?gMq?)NoneBrkFaceStoneBrkCmng3f?
MasVnrTypeg3f?gI/j ?g,	PS˦?ge@?gQ~?gZ	%qv?)1Story2Storyz1.5FinSLvlSFoyerz1.5Unfz2.5Unfz2.5Fin
HouseStyleg$	P?gHp?gK$?gՐ?g4*p?)GdTAFaExPogE`o?FireplaceQugș&l??皙?g5e?gunڌ`?)r  r  r  r  r  	ExterCondgn0a?g{gUId?)r  r  r  r  	ExterQualg8 nV?)r  r  r  g(xߢs?PoolQCr   r   c                    j                  | |      }t        d|z
  z        }t        j                  d|z
        dkD  r,|dkD  r'j	                  |d      }t        j
                  ||<   j                  |t        j                        S )	Nr   r.   ra   r   r   Fr+   replacer,   )r   ru   r2   r   r   rk   rp   rG   )	r   r   r   r   r   null_idxr!   rP   rQ   s	         rY   	synth_numz#get_ames_housing.<locals>.synth_num  s}    JJ3c	J:i1w;/066#- 4'GaKzz)'5zIH&&AhKyy"**y--r   gmtF@gOfK<Q=@	3SsnPorchgݹsΝ?g2Tf?
FireplacesgR u?gP$[r?BsmtHalfBathgvS?g_-?HalfBathgbĈ#F?g+?
GarageCarsg$[Q<@g"$#e?TotRmsAbvGrdg$[Q<{@g%Ǒ|@
BsmtFinSF1ge0OFG@g*Ӛ{7*d@
BsmtFinSF2gNڭ@gCk@	GrLivAreagg6.@gK@ScreenPorchr   g(e@g.A)r   r%   r0   r1   r2   r3   r   r   r   r   r   r   rp   r   rr   shufflezerosro   r-   rv   rs   codesrq   rG   r   r   r   )	rV   r   r  rr   r   r{   r!   rP   rQ   s	         @@@rY   get_ames_housingr  J  s   "   *
))


%CI	BsEz*E12=B	. 	
 		BzN #(;WB~  		
 	B{O !	
 	B| !		
 	B| !		
 	B| "	
 		B}  	
 		B{O  		
 	B{O 	

 	BxL.u .5 .5 .RYY .   24EsKB{O !24FLB|"#79LcRB~24FLBzN !35GMB|"#46H#NB~ !24EsKB| !24FLB| 13DcJB{O!"46H#NB}2::GKK	GB 		|$AZZ bekk2#6#67A''

33AAA	 	QUUW	$$A	affh	&&Aq5Lr   dpathc           	      b   t        j                  d      }d}t        j                  j	                  | d      }t        j                  j                  |      st        j                  ||       t        j                  |d      5 }|j                  |        ddd       |j                  t        j                  j	                  | d      t        j                  j	                  | d	      t        j                  j	                  | d
      fdd      \	  }}}}}	}
}}}|||||	|
|||f	S # 1 sw Y   xY w)zFetch the mq2008 dataset.r   z>https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zipz
MQ2008.zip)urlfilenamer)pathNzMQ2008/Fold1/train.txtzMQ2008/Fold1/test.txtzMQ2008/Fold1/vali.txtTF)query_id
zero_based)r0   r1   osr%  joinexistsr   urlretrievezipfileZipFile
extractallload_svmlight_files)r   r   srcr   fx_trainy_train	qid_trainx_testy_testqid_testx_validy_valid	qid_valids                 rY   
get_mq2008r;    s#    ""#56H
JCWW\\%.F77>>&!f5		% !	% ! 	$$GGLL 89GGLL 78GGLL 78	

  	% 	
 	
 
/! !s   D%%D.Fr&   )	vary_sizer   n_samples_per_batch	n_batchesuse_cupyr<  r   c                   g }g }g }|r3ddl }	|	j                  j                  t        j                  |            }
nt        j                  j                  |      }
t        |      D ]x  }|r| |dz  z   n| }|
j                  ||      }|
j                  |      }|
j                  dd|      }|j                  |       |j                  |       |j                  |       z |||fS )zMake batches of dense data.r   N
   r.   r(   )	cupyr3   r4   r2   r?   r   randnuniformappend)r=  r"   r>  r?  r<  r   rT   r   wrB  rQ   r   r!   _X_y_ws                   rY   make_batchesrJ  :  s     	A
A
Akk%%bii&=>ii##L19 4='!b&0CV	YYy*-YYy![[QQY[7			 a7Nr   c                   H   e Zd ZU dZej
                  ed<   ej                  e	j                     ed<   ej                  e	j                     ed<   ej                  e	j                     ed<   ej                  e	j                     ed<   ej                  e	j                     ed<   y)		ClickFoldzCA structure containing information about generated user-click data.rT   r   qidscoreclickposN)__name__
__module____qualname____doc__r   
csr_matrix__annotations__nptNDArrayr2   r7   rF   r8    r   rY   rL  rL  [  sp    M
{{288	RXX	;;rzz"";;rxx  	RXX	r   rL  c                   <    e Zd ZU dZeed<   eed<   eed<   defdZy)	RelDataCVzPSimple data struct for holding a train-test split of a learning to rank dataset.r   testmax_relr#   c                      | j                   dk(  S )z6Whether the label consists of binary relevance degree.r.   )r]  selfs    rY   	is_binaryzRelDataCV.is_binaryn  s    ||q  r   N)	rQ  rR  rS  rT  RelDatarV  ru   rO   ra  rY  r   rY   r[  r[  g  s     ZN
ML!4 !r   r[  c                       e Zd ZdZdeddfdZdej                  ej                     dej                  ej                     dej                  ej                     fdZy)	PBMa  Simulate click data with position bias model. There are other models available in
    `ULTRA <https://github.com/ULTR-Community/ULTRA.git>`_ like the cascading model.

    References
    ----------
    Unbiased LambdaMART: An Unbiased Pairwise Learning-to-Rank Algorithm

    etar#   Nc                     t        j                  g d      | _        t        j                  g d      }t        j                  ||      | _        y )N)r  g{Gz?Q?gp=
ף?ra   )
g(\?gQ?gQ?g(\?rg  皙?g)\(?r  g{Gz?gQ?)r2   rK   
click_probpower	exam_prob)r`  re  rk  s      rY   __init__zPBM.__init__}  s8    ((#?@HHH
	 )S1r   labelspositionc                    t        j                  |d      }t        j                  |j                        }d||dk  <   d||t	        | j
                        k\  <   | j
                  |   }t        j                  |j                        }|j                  |j                  k(  sJ t        j                  |d      }d||| j                  j                  k\  <   | j                  |   }t         j                  j                  d      }|j                  |j                  d   t         j                        }t        j                  |j                  t         j                        }d||||z  k  <   |S )	zSample clicks for one query based on input relevance degree and position.

        Parameters
        ----------

        labels :
            relevance_degree

        T)copyr   r   r&   )r+   r-   r,   r.   )r2   rK   r  r   lenri  r+   rk  r3   r   rF   r7   )	r`  rm  rn  ri  rk  ranksrQ   probclickss	            rY   sample_clicks_for_queryzPBM.sample_clicks_for_query  s    &t,XXfll+
vz13vT__--.__V,
HHV\\*	}}+++-.0et~~***+NN5)	ii##D)zzv||AbjjzA(*RXX(N01ti*,,-r   )rQ  rR  rS  rT  r   rl  rW  rX  r2   r7   r8   ru  rY  r   rY   rd  rd  s  s^    2E 2d 2!kk"((+!7:{{2887L!	RXX	!r   rd  r   c           
         t        j                  |       } | j                  }t         j                  dt        j                  t        j
                  | dd | dd d             dz   f   }t        j                  t         j                  ||f         }| |   }t        j                  |t        j                  | j                  g            }|||fS )zzRun length encoding using numpy, modified from:
    https://gist.github.com/nvictus/66627b580c13068589957d6ab0919e66

    r   r.   Nr   T)	equal_nan)	r2   asarrayr+   r_flatnonzeroisclosediffrE  rK   )r   r   startslengthsr   indptrs         rY   rlencoder    s    
 	

1A	AUU1bnnbjj12#2$&O%OPSTTTUFggbeeFAI&'GvYFYYvrxx12F7F""r   rT   r   rM  sample_ratec                    t         j                  j                  d      }t        | j                  d   |z        }t        j
                  d| j                  d   t         j                        }|j                  |       |d| }| |   }||   }||   }	t        j                  |	      }
||
   }||
   }|	|
   }	t        dd      }|j                  |||	       |j                  |       }|S )	zWe use XGBoost to generate the initial score instead of SVMRank for
    simplicity. Sample rate is set to 0.1 by default so that we can test with small
    datasets.

    r&   r   r,   Nz	rank:ndcghist)	objectivetree_method)rM  )r2   r3   r   ru   r   aranger?   r  argsortr   fitpredict)rT   r   rM  r  rQ   r!   indexX_trainr3  r4  
sorted_idxltrscoress                rY   init_rank_scorer    s     ))


%CAGGAJ,-I1aggaj		BEKK*9EhGhGE
I I&Jj!Gj!G*%I
kv
>CGGGW)G, [[^FMr   foldscores_foldc                 4   | \  }}}|j                   t        j                  k(  sJ t        j                  |      }t        j                  |j
                  ft        j                        }t        j                  |j
                  ft        j                        }t        d      }|D ]f  }	|	|k(  }
|
j                  |
j                  d         }
||
   }t        j                  |      ddd   }|||
<   ||
   }|j                  ||      }|||
<   h |j                  d   |j                  d   k(  sJ |j                  |j                  f       |j                  d   |j                  d   k(  sJ |j                  |j                  f       t        ||||||      S )zSimulate clicks for one fold.r,   ra   )re  r   Nr   )r-   r2   r7   uniqueemptyr+   r8   rd  r6   r   r  ru  rL  )r  r  X_foldy_foldqid_foldqidsrn  rt  pbmqqid_maskquery_scoresquery_positionrelevance_degreesquery_clickss                  rY   simulate_one_foldr    so   
  $FFH>>RXX%%%99XDxxbhh7HXXv{{nBHH5F
#,C  
(=##HNN1$56"8,L1$B$7+"8,223DnU'x
( <<?hnnQ//O&,,1OO/<<?fll1o-Kfll/KK-VVX{FHMMr   cv_datac           	      d   t        t        | j                  | j                              \  }}}t	        j
                  dg|D cg c]  }|j                  d    c}z         }t	        j                  |      }t        |      dk(  sJ t        j                  |      }t	        j                  |      }t	        j                  |      }t        |||      }	t        d|j                        D 
cg c]  }
|	||
dz
     ||
     }}
g g g g g g f\  t        |j                  dz
        D ]  }
t        ||
   ||
   ||
   f||
         }j!                  |j"                         j!                  |j$                         j!                  |j&                         j!                  |j(                         j!                  |j*                         j!                  |j,                          t        |j                  dz
        D 
cg c]  }
|
   	 }}
t        d      D ]  }
||
   ||
   k(  j/                         rJ  t              dk(  r(t1        d   d   d   d   d   d         }d}||fS fdt        t                    D        \  }}||fS c c}w c c}
w c c}
w )z6Simulate click data using position biased model (PBM).r   r\   r.   r   Nc           
   3   b   K   | ]&  }t        |   |   |   |   |   |          ( y wr   )rL  ).0r   X_lstc_lstp_lstq_lsts_lsty_lsts     rY   	<genexpr>z"simulate_clicks.<locals>.<genexpr>$  s@      
 eAha%(E!HeAhaQ
s   ,/)r   zipr   r\  r2   rK   r   cumsumrq  r   vstackr   r  r   r+   r  rE  rT   r   rM  rN  rO  rP  allrL  )r  rT   r   rM  vr  X_fully_fullqid_fullscores_fullr   r  r  scores_check_1r   r\  r  r  r  r  r  r  s                   @@@@@@rY   simulate_clicksr     sx   S56IAq# XXqc3AQWWQZ334FYYvFv;%]]1F^^AF~~c"H "&&(;K>CAv{{>STk&Q-&)4TFT/12r2r2/E,E5%u6;;?#  !A$!c!f!5vayATVVTVVTXXTZZ TZZ TXX ).fkkAo(>?1eAh?N?1X 6q!VAY.335556 5zQ%(E!HeAha%(ERSHU $;	
 
3u:&
t $;G 4 U @s   	J#
1J(J-rt  rP  c           
         t        j                  |      }| |   } ||   }||   }||   }t        |      \  }}}t        d|j                        D ]  }||dz
     }	||   }
|	|
k  s	J |	|
f       t        j
                  ||	|
       j                  dk(  s	J |	|
f       ||	|
 }|j                         dk(  sJ |j                                |j                         |j                  dz
  k\  s9J |j                         |j                  |t        j
                  ||	|
       f       t        j                  |      }| |	|
 |   | |	|
 ||	|
 |   ||	|
 ||	|
 |   ||	|
 ||	|
 |   ||	|
  | |||f}|S )z,Sort data based on query index and position.r.   r   )r2   r  r  r   r+   r  minmax)rT   r   rM  rt  rP  r  r  _r   begend	query_posr|   s                rY   sort_ltr_samplesr  +  s    CJ	*AJF
j/C
j/CC=LFAq1fkk" 0QUmQiSy$3*$yyyS&++q0<3*<0CL	}}!#4Y]]_4#}})..1"44 	
MMONNIIc#cl#	7
 	
4 ZZ	*
s3Z
+#c
 S/*5s3s3Z
+#c
3s|J/C+0. faDKr   DTypeDMatrixTdevicec                    t         j                  j                         } | |j                  ddd      j	                  t         j
                        j                  dd            }t        |d      r|j                  dddf   }n	|dddf   }|} ||||	      }t        j                  t        d
      5  t        d|d|       ddd       t        |d      s | |j                         j                  dd            }||k(  j                         sJ |j                  j                   j"                  du sJ |j                  j                   j$                  du sJ |j'                  |j                  	        | |j                         j                  dd            }||j                  k(  j                         sJ |}|j)                  |       |j                         }	|j)                  |j                  d|j*                               |j                         }
|
|	k(  j                         sJ |j	                  t         j,                        }|j)                  |       |j                         }||	k(  j                         sJ |j                  dddd      }t        j                  t        d
      5  |j)                  |       ddd       yy# 1 sw Y   xY w# 1 sw Y   yxY w)zRun tests for base margin.r   ra   d   r/   2   r   ilocN)base_marginz.*base_margin.*r   r  )r  r  FTr.   r   )r2   r3   r   r   rq   rF   r6   hasattrr  r0   r   r   train_fnget_base_marginr  Tflagsc_contiguousf_contiguousset_infoset_base_marginr+   rG   )r  r  r  rQ   rT   r   r  Xygotbm_colbm_rowbm_f64s               rY   run_base_margin_infor  \  s   
))


!CcjjCcj*11"**=EEb!LMAq&FF1a4LadGK	!QK	0B	z);	< @6:B?@ 1fB&&(00Q78{"'')))}}""//5888}}""//4777
.B&&(00B78{}}$))+++ 
;'##%
;..q+2B2BCD##%& %%''' "((4
;'##%& %%''' ii1a+]]:-?@ 	,{+	, 	,7 @ @<	, 	,s   7K7K K K)r   as_densec                     t        t        j                  d      sTt        j                  j                  d      }t	        j                   dz
  |d      }|j                  dd       }||fS t        t        j                               dt        d	t        j                  f fd
}g }t              5 }	t              D ]#  }
|j                  |	j                  ||
             % 	 ddd       g }g }|D ]7  }|j                         \  }}|j                  |       |j                  |       9 t!        |      k(  sJ t	        j"                  |d      }t        j$                  |      }|j'                  |j(                  d   |j(                  d   f      j*                  }t        j,                  |d      }|j(                  d    k(  sJ |j(                  d   k(  sJ |j(                  d    k(  sJ |rR|j/                         }|j(                  d    k(  sJ |j(                  d   k(  sJ t        j0                  ||dk(  <   ||fS ||fS # 1 sw Y   hxY w)zMake sparse matrix.

    Parameters
    ----------

    as_dense:

      Return the matrix as np.ndarray with missing values filled by NaN

    r   r&   ra   csr)mr   r   r   format        r   t_idr#   c                    t         j                  j                  d| z        }
z  }| 
dz
  k(  r	| |z  z
  }n|}t        j                  	|dz
  |      j	                         }t        j
                  	df      }t        |j                  d         D ][  }|j                  |dz      |j                  |   z
  }|dk7  s+||d d |f   j                         |j                  	df      z  dz  z  }] ||fS )Nr&   r.   ra   )r  r   r   r   r   rh  )
r2   r3   r   r   tocscr  r   r   r  toarray)r  rQ   thread_sizen_features_tlocrT   r   r   r+   r"   r!   	n_threadsr   s           rY   
random_cscz*make_sparse_regression.<locals>.random_csc  s   ii##D4K0 I-9q= (4++==O)OMM(N	

 %' 	
 HHi^$qwwqz" 	JA88AE?QXXa[0DqyQq!tW__&YN)CCcII	J
 !tr   )max_workersNr  r   r.   r   )r  r2   r3   r4   r   r   r  multiprocessing	cpu_countru   
csc_matrixr   r   rE  submitresultrq  hstackrx  r6   r   r  r   r  rk   )r!   r"   r   r  rQ   rT   r   r  futuresexecutorr   	X_results	y_resultsr1  r  arrr  s   ```             @rY   make_sparse_regressionr    s=    299m,ii##D)MM(N
 JJ3c	J:!t O--/<I !2!2  . G			2 ;hy! 	;ANN8??:q9:	;; II xxz1
 y>Y&&&#]]9UCC


9A			1771:qwwqz*+--A
qqA99Q<9$$$99Q<:%%%771:"""kkmyy|y(((yy|z)))C1HAv6M;; ;s   >2II!	n_stringsseedc                 >   d}t               }t        j                  j                  |      }t	        |      | k  rZdj                  |j                  t        t        j                        |d            }|j                  |       t	        |      | k  rZt        |      S )zGenerate n unique strings.r    Tr  )setr2   r3   r   rq  r)  r   r   stringascii_lettersadd)r  r  name_lenunique_stringsrQ   
random_strs         rY   unique_random_stringsr    s    H"uN
))


%C
n
	
)WWJJtF001$JO

 	:&	 n
	
) r   r  ra   cpu)r   	cat_ratior  r   	cat_dtyper  n_categoriesonehotr  r  r   c          	         t        j                  d      }
t        j                  j	                  |      }t        j                  j	                  |dz         }|
j                         }t        |      D ]1  }|j                  d|d      d   }|dk(  rt        j                  |t        j                        r4t        j                  t        ||            }|j                  || d      }n*t        j                  d|      }|j                  d||       }|
j                  |d	      |t!        |      <   |t!        |         j"                  j%                  |      |t!        |      <   |j                  d||       }|
j                  ||j&                  	      |t!        |      <   4 t        j(                  | f
      }|j*                  D ]J  }t-        ||   j&                  |
j.                        r|||   j"                  j0                  z  }C|||   z  }L |dz  }|dkD  rt        |      D ]  }|j                  d| dz
  t3        | |z              }t        j4                  |j6                  ||f<   t9        |j:                  j6                  |         sh|t        j<                  |j:                  j6                  |   j>                        j@                  k(  rJ  |jB                  d   |k(  sJ |r|
jE                  |      }|r+tG        |j*                        }|jI                  |       ||   }|	dk7  r0|	dv sJ ddl%}ddl&}|jO                  |      }|j                  |      }||fS )a/  Generate categorical features for test.

    Parameters
    ----------
    n_categories:
        Number of categories for categorical features.
    onehot:
        Should we apply one-hot encoding to the data?
    sparsity:
        The ratio of the amount of missing values over the number of all entries.
    cat_ratio:
        The ratio of features that are categorical.
    shuffle:
        Whether we should shuffle the columns.
    cat_dtype :
        The dtype for categorical features, might be string or numeric.

    Returns
    -------
    X, y
    r%   r.   r/   r   Tr  r(   rb   r,   r  r  r  )cudagpuN)(r0   r1   r2   r3   r4   r   r   rM   
issubdtypestr_rK   r  r   r  r5   rp   r   rs   set_categoriesr-   r  rr   ro   rv   r  ru   rk   r  r   rS   r  
categoriesr+   r   get_dummiesr   r  cudfrB  from_pandas)r!   r"   r  r  r   r  r  r   r   r  rP   rQ   row_rngrV   r   r   r	  r{   numlabelcolr  rr   r  rB  s                            rY   make_categoricalr    s   D 
		X	&B ))


-Cii##L1$45G	B: 9a3A6Q;}}Y0  XX&;L!&LM
NN:ItNLYYq,7
OO9OM1J7Bs1vJCF66zBBs1vJ//al/KC3cii8Bs1vJ!9$ HHI<(Ezz bgmmR%8%89RW[[&&&ERWE	
 
QJE#~z" 	TAOOIMI4H0I $ E !#BGGE1Hryy~~a01#ryy1B1M1M'N'S'SSSS	T 88A;*$$$^^Brzz" [(((b!

5!u9r   c                        e Zd ZdZddddededee   dee   d	ed
ee   ddf fdZ	de
defdZddZdeeej                   ej$                  f   eee   f   fdZ xZS )IteratorForTestzCIterator for testing streaming DMatrix. (external memory, quantile)FN)on_hostmin_cache_page_bytesrT   r   rF  cacher  r  r#   c                    t        |      t        |      k(  sJ || _        || _        || _        d| _        t
        |   |||       y )Nr   )cache_prefixr  r  )rq  rT   r   rF  itsuperrl  )r`  rT   r   rF  r  r  r  	__class__s          rY   rl  zIteratorForTest.__init__T  sR     1vQ!5 	 	
r   
input_datac                 |   | j                   t        | j                        k(  ryt        j                  t
        d      5   || j                  | j                      | j                  | j                      d        d d d         || j                  | j                      j                         | j                  | j                      j                         | j                  r'| j                  | j                      j                         nd        t        j                          | xj                   dz  c_         y# 1 sw Y   xY w)NFzKeyword argumentr   )r|   r  weightr.   T)r  rq  rT   r0   r   	TypeErrorr   rp  rF  gccollect)r`  r  s     rY   nextzIteratorForTest.nexti  s    77c$&&k!]]9,>? 	?tvvdggtww>	? 	%%'&&/&&(-1VV466$''?'')	

 	

1	? 	?s   9D22D;c                     d| _         y )Nr   )r  r_  s    rY   resetzIteratorForTest.resetz  s	    r   c                    t        | j                  d   t        j                        r"t        j                  | j                  d      }n!t        j                  | j                  d      }t        j                  | j                  d      }| j                  r"t        j                  | j                  d      }nd}|||fS )zReturn concatenated arrays.r   r  r  r   N)	ro   rT   r   rU  r  r2   r   r   rF  )r`  rT   r   rF  s       rY   	as_arrayszIteratorForTest.as_arrays}  s     dffQi!2!23dffU3AtvvA.ANN466*66tvvA.AA!Qwr   )r#   N)rQ  rR  rS  rT  r   r   r   rO   ru   rl  r   r"  r$  r   r   r2   r   r   rU  r   r&  __classcell__)r  s   @rY   r  r  Q  s    M .2

 
 H	
 }
 
 'sm
 

*x D "	uRZZ!2!223Y@SS	Tr   r  )F)r  )\rT  r   r  r(  r  r,  concurrent.futuresr   dataclassesr   r   r   r   r   r   r	   r
   r   r   r   r   r   r   r   urllibr   numpyr2   r0   rW  numpy.randomRNGscipyr   corer   r   r   r|   r   r   sklearnr   r   trainingr   r  r%   r   
DataFrameTr1   r   Memorymemoryru   r   r   rZ   r}   r   r   r  r   r   r   r   r  r   rU  r;  rO   rJ  rX  r7   rb  rL  r[  rd  r  r   rF   r  r  r  r8   r  r  r  r  	DTypeLiker  r  rY  r   rY   <module>r6     s   $ 	  	   1 !        )  5 5 9 * (.J			X	&	|Q	/88 #8uU2::rzz12E$*4EEFdRS8v>9 >B7 7t
3 
4 
 4bjj"**&< = 4 4n "E"**bjj01 " " 8E"**bjj01 8 8 E"**bjj01    s%
BJJ 67 s sl 55

JJJJ
JJJJ
JJJJ
5 5x 	   	   4

T"**-tBJJ/??@< !!3;;rxx#8#++bhh:OO
P   	!
 	!4 4n#BHH% #%S[[#++0U*V #& 	!!
{{288! 
RXX	! 	!
 	[[!HN
!!3;;rxx#8#++bhh:OO
PNRZZ(N NF(Y (5HY<O1O+P (V..
{{288. 
RXX	. KK!	.
 
RXX	. 
KKKKKK.b+, +,DM +,3 +,SW +,^ RR #R/4R@DR
5""#RZZ/0R Rj S    S	  , %'XX]]] ]
 ] ] ] ] ] yy""] ] 9bjj !]@9h 9r   