
    ^i              	       h   d Z ddlZddlZddlZddlZddlZddlmZ ddlm	Z	m
Z
mZmZmZmZmZ ddlZddlmZmZmZmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddl m!Z! ddl"m#Z# ddl$m%Z% dede&fdZ'de
dee&   dee&e	f   fdZ( G d de      Zd3de&de)de)defdZ*dede)defdZ+dede&fdZ,defdZ-d4d e&d!eee&e)f      dej\                  fd"Z/d e&dee)   fd#Z0d$ede)fd%Z1d$ede2fd&Z3dede2fd'Z4d(ede)fd)Z5de&fd*Z6d+e&d,e
g e#f   de#fd-Z7d.e!de&fd/Z8d+e&de!fd0Z9d1ee&   de2fd2Z:y)5z;Xgboost pyspark integration submodule for helper functions.    N)Thread)AnyCallableDictOptionalSetTypeUnion)BarrierTaskContext	SparkConfSparkContext
SparkFilesTaskContext)SparkSession   )CommunicatorContext)Config)_Args)_ArgVals)Booster)XGBModel)RabitTrackerclsreturnc                 8    | j                    d| j                   S )zReturn the class name..)
__module____name__)r   s    T/var/www/html/hubwallet-dev/venv/lib/python3.12/site-packages/xgboost/spark/utils.pyget_class_namer       s    nnQs||n--    funcunsupported_setc                     t        j                  |       }i }|j                  j                         D ]C  }|j                  |j
                  us|j                  |vs+|j                  ||j                  <   E |S )zReturns a dictionary of parameters and their default value of function fn.  Only
    the parameters with a default value will be included.

    )inspect	signature
parametersvaluesdefaultemptyname)r"   r#   sigfiltered_params_dict	parameters        r   _get_default_params_from_funcr/      su     

D
!C^^**, E	 Y__4o53<3D3D 0E  r!   c                   0     e Zd ZdZdededdf fdZ xZS )r   z&Context with PySpark specific task ID.contextargsr   Nc                 \    t        |j                               |d<   t        |   di | y )Ndmlc_task_id )strpartitionIdsuper__init__)selfr1   r2   	__class__s      r   r9   zCommunicatorContext.__init__5   s+    "7#6#6#89^ 4 r!   )r   r   __qualname____doc__r   CollArgsValsr9   __classcell__)r;   s   @r   r   r   2   s&    0! 2 !L !T ! !r!   r   host	n_workersportc                     d|i}t        || d|      }|j                          t        |j                        }d|_        |j                          |j                  |j                                |S )z"Start Rabit tracker with n_workersrA   task)rA   host_ipsortbyrB   )targetT)r   startr   wait_fordaemonupdateworker_args)r@   rA   rB   r2   trackerthreads         r   _start_trackerrO   :   s`    !9-DYVRVWGMMO7++,FFM
LLNKK##%&Kr!   confc                     | j                   J | j                  dn| j                  }t        | j                   ||      }|S )z3Get rabit context arguments to send to each worker.r   )tracker_host_iptracker_portrO   )rP   rA   rB   envs       r   _get_rabit_argsrU   F   sE    +++!!)1t/@/@D
--y$
?CJr!   r1   c                     | j                         D cg c]   }|j                  j                  d      d   " }}|d   S c c}w )zLGets the hostIP for Spark. This essentially gets the IP of the first worker.:r   )getTaskInfosaddresssplit)r1   infotask_ip_lists      r   _get_host_ipr]   N   sB    ;B;O;O;QR4DLL&&s+A.RLR? Ss   %?c                      t         j                  j                         t        d      t        j
                  j                         S )z`Get or create spark session. Note: This function can only be invoked from driver
    side.

    z<_get_spark_session should not be invoked from executor side.)pysparkr   getRuntimeErrorr   buildergetOrCreater5   r!   r   _get_spark_sessionrd   T   s@    
  ,J
 	
 ++--r!   r+   levelc                    t        j                  |       }||j                  |       n<|j                  t         j                  k(  r|j                  t         j
                         |j                  sxt        j                         j                  sZt        j                  t        j                        }t        j                  d      }|j                  |       |j                  |       |S )zGGets a logger by name, or creates and configures it for the first time.z<%(asctime)s %(levelname)s %(name)s: %(funcName)s %(message)s)logging	getLoggersetLevelre   NOTSETINFOhandlersStreamHandlersysstderr	FormattersetFormatter
addHandler)r+   re   loggerhandler	formatters        r   
get_loggerrv   a   s    t$F <<7>>)OOGLL)??7#4#4#6#?#?''

3%%J
	 	Y''"Mr!   c                     t        j                  |       }|j                  t         j                  k(  rdS |j                  S )z+Get the logger level for the given log nameN)rg   rh   re   rj   )r+   rs   s     r   get_logger_levelrx   u   s0    t$F<<7>>14Cv||Cr!   spark_contextc                 d   | j                   j                         j                         dk\  r^| j                   j                         j                  | j                   j                         j	                         j                  d            S | j                   j                         j                         S )z0Gets the current max number of concurrent tasks.z3.1r   )_jscscversionmaxNumConcurrentTasksresourceProfileManagerresourceProfileFromIdry   s    r   _get_max_num_concurrent_tasksr   {   s     &&(E1!!$$&<<!!#::<RRSTU
 	
   "88::r!   c                 R    | j                   j                         j                         S )zWhether it is Spark local mode)r{   r|   isLocalr   s    r   	_is_localr      s"       "**,,r!   c                 x    | j                  d      }|d uxr$ |j                  d      xs |j                  d      S )Nzspark.masterzspark://zlocal-cluster)r`   
startswith)rP   masters     r   _is_standalone_or_localclusterr      s@    XXn%F *%K):):?)Kr!   task_contextc                     | t        d      | j                         }d|vrt        d      t        |d   j                  d   j	                               S )z&Get the gpu id from the task resourcesz3_get_gpu_id should not be invoked from driver side.gpuzDCouldn't get the gpu id, Please check the GPU resource configurationr   )ra   	resourcesint	addressesstrip)r   r   s     r   _get_gpu_idr      s`    PQQ&&(IIR
 	
 y))!,22455r!   c                      t        j                         } t        j                  j	                  | d      }t        j                  j                  |      st        j                  |       |S )Nzxgboost-tmp)r   getRootDirectoryospathjoinexistsmakedirs)root_dirxgb_tmp_dirs     r   _get_or_create_tmp_dirr      sF    **,H'',,x7K77>>+&
K r!   modelxgb_model_creatorc                 f     |       }|j                  t        | j                  d                   |S )zH
    Deserialize an xgboost.XGBModel instance from the input model.
    utf-8)
load_model	bytearrayencode)r   r   	xgb_models      r   deserialize_xgb_modelr      s.     "#I5<<#89:r!   boosterc                    t         j                  j                  t               t	        j
                          d      }| j                  |       t        |d      5 }|j                         }ddd       |S # 1 sw Y   S xY w)z
    Serialize the input booster to a string.

    Parameters
    ----------
    booster:
        an xgboost.core.Booster instance
    .jsonr   encodingN)	r   r   r   r   uuiduuid4
save_modelopenread)r   tmp_file_namefser_model_strings       r   serialize_boosterr      sk     GGLL!7!9djjl^5;QRM}%	mg	. $!668$$s   A77Bc                    t               }t        j                  j                  t	               t        j                          d      }t        |dd      5 }|j                  |        ddd       |j                  |       |S # 1 sw Y   xY w)zN
    Deserialize an xgboost.core.Booster from the input ser_model_string.
    r   wr   r   N)
r   r   r   r   r   r   r   r   writer   )r   r   r   r   s       r   deserialize_boosterr      sq     iGGGLL!7!9djjl^5;QRM	mS7	3 q	}%N s   BBdevicec                 
    | dv S )z&Whether xgboost is using CUDA workers.)cudar   r5   )r   s    r   use_cudar      s    _$$r!   )r   )N);r=   r%   rg   r   rn   r   	threadingr   typingr   r   r   r   r   r	   r
   r_   r   r   r   r   r   pyspark.sql.sessionr   
collectiver   CCtxr   r   CollArgsr   r>   corer   sklearnr   rM   r   r6   r    r/   r   rO   rU   r]   rd   Loggerrv   rx   r   boolr   r   r   r   r   r   r   r   r5   r!   r   <module>r      s   A   	 
   B B B  X X , 4  * 1   ". . .
 
 %(X 	#s(^ &!$ !	 	 	C 	 	& S X ,  
.L 
.S %S/!: gnn (D3 D8C= D; ;# ;-\ -d - t 6k 6c 6 #+BL#9w 3 "
s 
w 
%Xc] %t %r!   