
    (^i                        d Z ddlmZ ddlZddlmZ ddlmZ  e       5 Z	ddl
mZ ddd        G d d      Zy# 1 sw Y   xY w)	a  An implementation of `An Efficient Approach for Assessing Hyperparameter Importance`.

See http://proceedings.mlr.press/v32/hutter14.pdf and https://automl.github.io/fanova/cite.html
for how to cite the original work.

This implementation is inspired by the efficient algorithm in
`fanova` (https://github.com/automl/fanova) and
`pyrfr` (https://github.com/automl/random_forest_run) by the original authors.

Differences include relying on scikit-learn to fit random forests
(`sklearn.ensemble.RandomForestRegressor`) and that it is otherwise written entirely in Python.
This stands in contrast to the original implementation which is partially written in C++.
Since Python runtime overhead may become noticeable, included are instead several
optimizations, e.g. vectorized NumPy functions to compute the marginals, instead of keeping all
running statistics. Known cases include assessing categorical features with a larger
number of choices since each choice is given a unique one-hot encoded raw feature.
    )annotationsN)
try_import)_FanovaTree)RandomForestRegressorc                  X    e Zd Z	 	 	 	 	 	 	 	 	 	 	 	 ddZ	 	 	 	 	 	 	 	 	 	 ddZddZd	dZy)
_Fanovac                    t         j                          t        |||||      | _        d | _        d | _        d | _        y )N)n_estimators	max_depthmin_samples_splitmin_samples_leafrandom_state)_importscheckr   _forest_trees
_variances_column_to_encoded_columns)selfn_treesr   r   r   seeds         b/var/www/html/hubwallet-dev/venv/lib/python3.12/site-packages/optuna/importance/_fanova/_fanova.py__init__z_Fanova.__init__    sA     	, /-
 158<CG'    c                   |j                   d   |j                   d   k(  sJ |j                   d   |j                   d   k(  sJ |j                   d   dk(  sJ | j                  j                  ||       | j                  j                  D cg c]  }t	        |j
                  |       c}| _        || _        i | _        t        d | j                  D              rt        d      y c c}w )Nr         c              3  :   K   | ]  }|j                   d k(    yw)r   N)variance).0trees     r   	<genexpr>z_Fanova.fit.<locals>.<genexpr>F   s     :dt}}!:s   z-Encountered zero total variance in all trees.)shaper   fitestimators_r   tree_r   r   r   allRuntimeError)r   Xysearch_spacescolumn_to_encoded_columnses         r   r$   z_Fanova.fit5   s     wwqzQWWQZ'''wwqz]003333""1%***ADHLLD\D\]q{177M:]*C':dkk:: NOO ;	 ^s   C&c                   | j                   J | j                  J | j                  |       g }t        | j                         D ]B  \  }}|j                  }|dkD  s| j                  |   |   |z  }t        j                  ||      }D t        j                  |      }t        |j                               t        |j                               fS )N        )r   r   _compute_variances	enumerater   npappendasarrayfloatmeanstd)r   feature	fractions
tree_indexr!   tree_variancefractions          r   get_importancez_Fanova.get_importanceK   s    {{&&&***(.0	 )$++ 6 	;J MMMs"??73J?-OIIi:			; JJy)	Y^^%&immo(>>>r   c                   | j                   J | j                  J | j                  J || j                  v ry | j                  |   }t        j                  t        | j                         t        j                        }t        | j                         D ]0  \  }}|j                  |      }t        j                  |dd       ||<   2 || j                  |<   y )N)dtyper/   )
r   r   r   r2   emptylenfloat64r1   get_marginal_varianceclip)r   r8   raw_features	variancesr:   r!   marginal_variances          r   r0   z_Fanova._compute_variances^   s    {{&&&***..:::doo%66w?HHS-RZZ@	 )$++ 6 	JJ $ : :< H$&GG,=sD$IIj!	J $- r   N)r   intr   rH   r   int | floatr   rI   r   z
int | NonereturnNone)
r)   
np.ndarrayr*   rL   r+   rL   r,   zlist[np.ndarray]rJ   rK   )r8   rH   rJ   ztuple[float, float])r8   rH   rJ   rK   )__name__
__module____qualname__r   r$   r=   r0    r   r   r   r      s    HH H '	H
 &H H 
H*PP P "	P
 $4P 
P,?&-r   r   )__doc__
__future__r   numpyr2   optuna._importsr   optuna.importance._fanova._treer   r   sklearn.ensembler   r   rP   r   r   <module>rW      sE   $ #  & 7 \ 7X67M- M-	7 7s	   :A